25117204947

Committed 29 Apr 2026 03:11PM UTC coverage: 79.818% (+0.8%) from 78.972%

Build # 25117204947

Build Type

push

github

Committed by

christophevg

Commit Message

feat(tools): implement ExistenceTool for file/folder existence checks

Adds ExistenceTool for checking if files or folders exist with security hardening:
- Structured JSON output with exists, type, and path fields
- Symlink rejection to prevent path traversal attacks
- PathGuardrail integration for path containment validation
- Generic error messages to prevent information disclosure
- Expanded default blocked patterns (3 to 13)
- Updated ToolResult type to support structured results
- Added 28 comprehensive unit tests

🤖 Implemented together with a coding agent.

Coverage Stats

716 of 876 branches covered (81.74%)

Branch coverage included in aggregate %.

60 of 64 new or added lines in 6 files covered. (93.75%)

267 existing lines in 12 files now uncovered.

4370 of 5496 relevant lines covered (79.51%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.11

/src/yoker/context/basic.py

"""Basic persistence context manager implementation.

Provides JSONL-based context persistence with secure file handling.

Note:
  File locking uses fcntl (Unix-only). On Windows, file writes
  are still atomic but lack inter-process locking protection.
  For production use on Windows, consider adding a cross-platform
  file locking library.
"""

import json
import os
from datetime import datetime
from pathlib import Path
from typing import Any

from yoker.context.interface import ContextStatistics
from yoker.context.validator import is_safe_path, validate_session_id, validate_storage_path
from yoker.exceptions import ContextCorruptionError, SessionNotFoundError
from yoker.logging import get_logger

log = get_logger(__name__)

# File permissions
DIR_MODE = 0o700  # Owner-only for directories
FILE_MODE = 0o600  # Owner-only for files


class BasicPersistenceContextManager:
  """Context manager with JSONL persistence.

  Stores conversation history in JSONL (JSON Lines) format with:
  - Atomic writes for crash safety
  - Secure file permissions
  - Session lifecycle tracking

  Record types:
  - session_start: Session metadata
  - message: User/assistant/system message
  - tool_result: Tool execution result
  - turn: Turn boundary marker
  - session_end: Session termination marker
  """

  def __init__(
    self,
    storage_path: Path | str,
    session_id: str = "auto",
  ) -> None:
    """Initialize context manager.

    Args:
      storage_path: Directory for storing context files.
      session_id: Session ID or "auto" to generate.

    Raises:
      ValidationError: If storage_path or session_id is invalid.
    """
    # Validate and resolve storage path
    self._storage_path = validate_storage_path(Path(storage_path), "context.storage_path")

    # Validate session ID
    self._session_id = validate_session_id(session_id, "context.session_id")

    # In-memory context: ordered sequence of all items
    # Each item is {"type": "message"|"tool_result", "data": {...}}
    self._sequence: list[dict[str, Any]] = []

    # Statistics
    self._start_time = datetime.now()
    self._last_turn_time: datetime | None = None
    self._tool_call_count = 0

    # File path
    self._file_path = self._storage_path / f"{self._session_id}.jsonl"

    log.debug(
      "context_initialized",
      session_id=self._session_id,
      storage_path=str(self._storage_path),
    )

  def get_session_id(self) -> str:
    """Get the unique session identifier."""
    return self._session_id

  def add_message(
    self,
    role: str,
    content: str,
    metadata: dict[str, Any] | None = None,
    thinking: str | None = None,
  ) -> None:
    """Add a message to the context.

    Args:
      role: Message role ("user", "assistant", "system").
      content: Message content.
      metadata: Optional metadata (e.g., images, files).
      thinking: Optional thinking/reasoning content (for assistant messages).
    """
    message: dict[str, Any] = {
      "role": role,
      "content": content,
    }
    if metadata:
      message["metadata"] = metadata
    if thinking:
      message["thinking"] = thinking

    self._sequence.append({"type": "message", "data": message})
    self._append_record("message", message)

  def add_tool_result(
    self,
    tool_name: str,
    tool_id: str,
    result: str,
    success: bool = True,
  ) -> None:
    """Add a tool execution result to the context."""
    tool_result: dict[str, Any] = {
      "tool_name": tool_name,
      "tool_id": tool_id,
      "result": result,
      "success": success,
    }

    self._sequence.append({"type": "tool_result", "data": tool_result})
    self._tool_call_count += 1
    self._append_record("tool_result", tool_result)

  def add_tool_calls(
    self,
    tool_calls: list[dict[str, Any]],
    thinking: str | None = None,
  ) -> None:
    """Add an assistant message with tool calls to the context.

    This must be called BEFORE add_tool_result() for each tool call.
    The assistant message with tool_calls is required by the LLM API
    to understand what tools were called before receiving results.

    Args:
      tool_calls: List of tool call dictionaries with 'name' and 'arguments'.
      thinking: Optional thinking/reasoning content from the assistant.
    """
    # Store as a special assistant message with tool_calls
    assistant_msg: dict[str, Any] = {
      "role": "assistant",
      "tool_calls": tool_calls,
      "content": "",  # Empty content when only tool calls
    }
    if thinking:
      assistant_msg["thinking"] = thinking

    self._sequence.append({"type": "message", "data": assistant_msg})
    self._append_record("tool_call_message", {"tool_calls": tool_calls, "thinking": thinking})

  def get_context(self) -> list[dict[str, Any]]:
    """Get the full context for backend submission.

    Returns messages and tool results in the correct order for the LLM API:
    - User messages
    - Assistant messages (with tool_calls if present)
    - Tool result messages (after the assistant message)
    """
    context: list[dict[str, Any]] = []

    for item in self._sequence:
      if item["type"] == "message":
        msg = item["data"]
        # Build message dict, including tool_calls and thinking if present
        message: dict[str, Any] = {
          "role": msg["role"],
          "content": msg.get("content", ""),
        }
        # Include tool_calls for assistant messages
        if msg["role"] == "assistant" and "tool_calls" in msg:
          message["tool_calls"] = msg["tool_calls"]
        # Include thinking for assistant messages
        if msg["role"] == "assistant" and "thinking" in msg:
          message["thinking"] = msg["thinking"]
        context.append(message)
      elif item["type"] == "tool_result":
        tr = item["data"]
        context.append(
          {
            "role": "tool",
            "name": tr["tool_name"],
            "content": tr["result"],
          }
        )

    return context

  def get_messages(self) -> list[dict[str, Any]]:
    """Get all recorded messages (excludes tool results)."""
    messages: list[dict[str, Any]] = []
    for item in self._sequence:
      if item["type"] == "message":
        messages.append(item["data"])
    return messages

  def start_turn(self, user_message: str) -> None:
    """Start a new conversation turn."""
    turn_record: dict[str, Any] = {
      "user_message": user_message,
      "start_time": datetime.now().isoformat(),
    }
    self._sequence.append({"type": "turn", "data": turn_record})
    self._append_record("turn_start", turn_record)

    # Add user message
    self.add_message("user", user_message)

  def end_turn(self, assistant_message: str, thinking: str | None = None) -> None:
    """End the current conversation turn.

    Args:
      assistant_message: The assistant's response content.
      thinking: Optional thinking/reasoning content from the assistant.
    """
    self._last_turn_time = datetime.now()

    # Add assistant message with optional thinking
    self.add_message("assistant", assistant_message, thinking=thinking)

    # Append turn end record
    self._append_record(
      "turn_end",
      {
        "assistant_message": assistant_message,
      },
    )

  def save(self) -> None:
    """Persist context to storage.

    Creates storage directory if needed and writes all records.
    """
    self._ensure_storage_directory()

    # Write session_start if file doesn't exist
    if not self._file_path.exists():
      self._write_session_start()

    # Flush any buffered writes
    self._flush_pending_records()

  def load(self) -> bool:
    """Load context from storage.

    Returns:
      True if context was loaded, False if no stored context exists.

    Raises:
      ContextCorruptionError: If stored context is corrupted.
    """
    if not self._file_path.exists():
      return False

    # Check if path is safe
    if not is_safe_path(self._storage_path, self._file_path):
      raise SessionNotFoundError(self._session_id)

    try:
      self._load_from_file()
      return True
    except json.JSONDecodeError as e:
      raise ContextCorruptionError(
        str(self._file_path),
        e.lineno or 0,
        f"Invalid JSON: {e.msg}",
      ) from None

  def clear(self) -> None:
    """Clear in-memory context (does not delete from storage)."""
    self._sequence.clear()
    self._tool_call_count = 0
    self._last_turn_time = None

  def delete(self) -> None:
    """Delete stored context from disk.

    Raises:
      SessionNotFoundError: If session doesn't exist.
    """
    if not self._file_path.exists():
      raise SessionNotFoundError(self._session_id)

    try:
      self._file_path.unlink()
      log.debug("context_deleted", path=str(self._file_path))
    except OSError as e:
      log.error("context_delete_failed", path=str(self._file_path), error=str(e))
      raise

  def get_statistics(self) -> ContextStatistics:
    """Get statistics about context usage."""
    # Count items in sequence
    message_count = sum(1 for item in self._sequence if item["type"] == "message")
    turn_count = sum(1 for item in self._sequence if item["type"] == "turn")

    return ContextStatistics(
      message_count=message_count,
      turn_count=turn_count,
      tool_call_count=self._tool_call_count,
      start_time=self._start_time,
      last_turn_time=self._last_turn_time,
    )

  def close(self) -> None:
    """Release resources and flush any pending writes."""
    self._append_record(
      "session_end",
      {
        "end_time": datetime.now().isoformat(),
      },
    )

  # Private methods

  def _ensure_storage_directory(self) -> None:
    """Create storage directory with secure permissions if it doesn't exist."""
    if not self._storage_path.exists():
      self._storage_path.mkdir(parents=True, mode=DIR_MODE)
      log.debug("storage_created", path=str(self._storage_path))
    else:
      # Ensure correct permissions
      try:
        self._storage_path.chmod(DIR_MODE)
      except OSError:
        pass  # Ignore permission errors on existing directories

  def _append_record(self, record_type: str, data: dict[str, Any]) -> None:
    """Append a record to the JSONL file.

    Uses atomic write for crash safety.

    Args:
      record_type: Type of record (session_start, message, etc.).
      data: Record data dictionary.
    """
    self._ensure_storage_directory()

    record = {
      "type": record_type,
      "timestamp": datetime.now().isoformat(),
      "data": data,
    }

    self._atomic_write_jsonl(record)

  def _atomic_write_jsonl(self, record: dict[str, Any]) -> None:
    """Write a record atomically to the JSONL file.

    Uses file locking for atomic appends with secure permissions.

    Args:
      record: The record dictionary to write.
    """
    import fcntl

    # Ensure storage directory exists
    self._ensure_storage_directory()

    # Create file if needed with secure permissions
    if not self._file_path.exists():
      self._file_path.touch(mode=FILE_MODE)
    else:
      # Ensure permissions on existing file
      try:
        self._file_path.chmod(FILE_MODE)
      except OSError:
        pass

    # Write with file locking for atomic append
    with open(self._file_path, "a") as f:
      # Acquire exclusive lock
      fcntl.flock(f.fileno(), fcntl.LOCK_EX)
      try:
        json.dump(record, f)
        f.write("\n")
        f.flush()
        os.fsync(f.fileno())
      finally:
        fcntl.flock(f.fileno(), fcntl.LOCK_UN)

  def _write_session_start(self) -> None:
    """Write session_start record."""
    record = {
      "type": "session_start",
      "timestamp": self._start_time.isoformat(),
      "data": {
        "session_id": self._session_id,
        "start_time": self._start_time.isoformat(),
      },
    }
    self._atomic_write_jsonl(record)

  def _flush_pending_records(self) -> None:
    """Flush any pending writes to disk.

    For JSONL files, this is a no-op since records are written immediately.
    """
    # Sync file to disk
    if self._file_path.exists():
      with open(self._file_path, "a") as f:
        os.fsync(f.fileno())

  def _load_from_file(self) -> None:
    """Load context from JSONL file.

    Parses all records and reconstructs in-memory state.

    Raises:
      ContextCorruptionError: If file is corrupted.
    """
    self.clear()

    line_num = 0
    try:
      with open(self._file_path) as f:
        for line_num, line in enumerate(f, start=1):
          line = line.strip()
          if not line:
            continue

          record = json.loads(line)
          self._process_record(record, line_num)

    except json.JSONDecodeError as e:
      raise ContextCorruptionError(
        str(self._file_path),
        line_num,
        f"Invalid JSON: {e.msg}",
      ) from None

  def _process_record(self, record: dict[str, Any], line_num: int) -> None:
    """Process a single JSONL record.

    Args:
      record: The parsed record dictionary.
      line_num: Line number for error messages.

    Raises:
      ContextCorruptionError: If record is malformed.
    """
    if "type" not in record:
      raise ContextCorruptionError(
        str(self._file_path),
        line_num,
        "Missing record type",
      )

    record_type = record.get("type")
    data = record.get("data", {})

    if record_type == "session_start":
      # Already handled during initialization
      pass

    elif record_type == "message":
      if "role" not in data or "content" not in data:
        raise ContextCorruptionError(
          str(self._file_path),
          line_num,
          "Missing message fields",
        )
      self._sequence.append({"type": "message", "data": data})

    elif record_type == "tool_result":
      if "tool_id" not in data:
        raise ContextCorruptionError(
          str(self._file_path),
          line_num,
          "Missing tool_id",
        )
      self._sequence.append({"type": "tool_result", "data": data})
      self._tool_call_count += 1

    elif record_type == "tool_call_message":
      # Record containing assistant message with tool_calls
      if "tool_calls" not in data:
        raise ContextCorruptionError(
          str(self._file_path),
          line_num,
          "Missing tool_calls",
        )
      # Convert to message format
      self._sequence.append(
        {
          "type": "message",
          "data": {
            "role": "assistant",
            "tool_calls": data["tool_calls"],
            "content": "",
          },
        }
      )

    elif record_type == "turn_start":
      self._sequence.append({"type": "turn", "data": data})

    elif record_type == "turn_end":
      # Turn end - nothing special to do, statistics will be computed
      pass

    elif record_type == "session_end":
      # Session ended, nothing special to do
      pass

    else:
      log.warning(
        "unknown_record_type",
        record_type=record_type,
        line=line_num,
      )


__all__ = [
  "BasicPersistenceContextManager",
]

1	"""Basic persistence context manager implementation.
2
3	Provides JSONL-based context persistence with secure file handling.
4
5	Note:
6	File locking uses fcntl (Unix-only). On Windows, file writes
7	are still atomic but lack inter-process locking protection.
8	For production use on Windows, consider adding a cross-platform
9	file locking library.
10	"""
11
12	import json	1✔
13	import os	1✔
14	from datetime import datetime	1✔
15	from pathlib import Path	1✔
16	from typing import Any	1✔
17
18	from yoker.context.interface import ContextStatistics	1✔
19	from yoker.context.validator import is_safe_path, validate_session_id, validate_storage_path	1✔
20	from yoker.exceptions import ContextCorruptionError, SessionNotFoundError	1✔
21	from yoker.logging import get_logger	1✔
22
23	log = get_logger(__name__)	1✔
24
25	# File permissions
26	DIR_MODE = 0o700 # Owner-only for directories	1✔
27	FILE_MODE = 0o600 # Owner-only for files	1✔
28
29
30	class BasicPersistenceContextManager:	1✔
31	"""Context manager with JSONL persistence.
32
33	Stores conversation history in JSONL (JSON Lines) format with:
34	- Atomic writes for crash safety
35	- Secure file permissions
36	- Session lifecycle tracking
37
38	Record types:
39	- session_start: Session metadata
40	- message: User/assistant/system message
41	- tool_result: Tool execution result
42	- turn: Turn boundary marker
43	- session_end: Session termination marker
44	"""
45
46	def __init__(	1✔
47	self,
48	storage_path: Path \| str,
49	session_id: str = "auto",
50	) -> None:
51	"""Initialize context manager.
52
53	Args:
54	storage_path: Directory for storing context files.
55	session_id: Session ID or "auto" to generate.
56
57	Raises:
58	ValidationError: If storage_path or session_id is invalid.
59	"""
60	# Validate and resolve storage path
61	self._storage_path = validate_storage_path(Path(storage_path), "context.storage_path")	1✔
62
63	# Validate session ID
64	self._session_id = validate_session_id(session_id, "context.session_id")	1✔
65
66	# In-memory context: ordered sequence of all items
67	# Each item is {"type": "message"\|"tool_result", "data": {...}}
68	self._sequence: list[dict[str, Any]] = []	1✔
69
70	# Statistics
71	self._start_time = datetime.now()	1✔
72	self._last_turn_time: datetime \| None = None	1✔
73	self._tool_call_count = 0	1✔
74
75	# File path
76	self._file_path = self._storage_path / f"{self._session_id}.jsonl"	1✔
77
78	log.debug(	1✔
79	"context_initialized",
80	session_id=self._session_id,
81	storage_path=str(self._storage_path),
82	)
83
84	def get_session_id(self) -> str:	1✔
85	"""Get the unique session identifier."""
86	return self._session_id	1✔
87
88	def add_message(	1✔
89	self,
90	role: str,
91	content: str,
92	metadata: dict[str, Any] \| None = None,
93	thinking: str \| None = None,
94	) -> None:
95	"""Add a message to the context.
96
97	Args:
98	role: Message role ("user", "assistant", "system").
99	content: Message content.
100	metadata: Optional metadata (e.g., images, files).
101	thinking: Optional thinking/reasoning content (for assistant messages).
102	"""
103	message: dict[str, Any] = {	1✔
104	"role": role,
105	"content": content,
106	}
107	if metadata:	1✔
UNCOV 108	message["metadata"] = metadata	×
109	if thinking:	1✔
UNCOV 110	message["thinking"] = thinking	×
111
112	self._sequence.append({"type": "message", "data": message})	1✔
113	self._append_record("message", message)	1✔
114
115	def add_tool_result(	1✔
116	self,
117	tool_name: str,
118	tool_id: str,
119	result: str,
120	success: bool = True,
121	) -> None:
122	"""Add a tool execution result to the context."""
123	tool_result: dict[str, Any] = {	1✔
124	"tool_name": tool_name,
125	"tool_id": tool_id,
126	"result": result,
127	"success": success,
128	}
129
130	self._sequence.append({"type": "tool_result", "data": tool_result})	1✔
131	self._tool_call_count += 1	1✔
132	self._append_record("tool_result", tool_result)	1✔
133
134	def add_tool_calls(	1✔
135	self,
136	tool_calls: list[dict[str, Any]],
137	thinking: str \| None = None,
138	) -> None:
139	"""Add an assistant message with tool calls to the context.
140
141	This must be called BEFORE add_tool_result() for each tool call.
142	The assistant message with tool_calls is required by the LLM API
143	to understand what tools were called before receiving results.
144
145	Args:
146	tool_calls: List of tool call dictionaries with 'name' and 'arguments'.
147	thinking: Optional thinking/reasoning content from the assistant.
148	"""
149	# Store as a special assistant message with tool_calls
UNCOV 150	assistant_msg: dict[str, Any] = {	×
151	"role": "assistant",
152	"tool_calls": tool_calls,
153	"content": "", # Empty content when only tool calls
154	}
UNCOV 155	if thinking:	×
UNCOV 156	assistant_msg["thinking"] = thinking	×
157
UNCOV 158	self._sequence.append({"type": "message", "data": assistant_msg})	×
UNCOV 159	self._append_record("tool_call_message", {"tool_calls": tool_calls, "thinking": thinking})	×
160
161	def get_context(self) -> list[dict[str, Any]]:	1✔
162	"""Get the full context for backend submission.
163
164	Returns messages and tool results in the correct order for the LLM API:
165	- User messages
166	- Assistant messages (with tool_calls if present)
167	- Tool result messages (after the assistant message)
168	"""
169	context: list[dict[str, Any]] = []	1✔
170
171	for item in self._sequence:	1✔
172	if item["type"] == "message":	1✔
173	msg = item["data"]	1✔
174	# Build message dict, including tool_calls and thinking if present
175	message: dict[str, Any] = {	1✔
176	"role": msg["role"],
177	"content": msg.get("content", ""),
178	}
179	# Include tool_calls for assistant messages
180	if msg["role"] == "assistant" and "tool_calls" in msg:	1✔
UNCOV 181	message["tool_calls"] = msg["tool_calls"]	×
182	# Include thinking for assistant messages
183	if msg["role"] == "assistant" and "thinking" in msg:	1✔
UNCOV 184	message["thinking"] = msg["thinking"]	×
185	context.append(message)	1✔
186	elif item["type"] == "tool_result":	1✔
187	tr = item["data"]	1✔
188	context.append(	1✔
189	{
190	"role": "tool",
191	"name": tr["tool_name"],
192	"content": tr["result"],
193	}
194	)
195
196	return context	1✔
197
198	def get_messages(self) -> list[dict[str, Any]]:	1✔
199	"""Get all recorded messages (excludes tool results)."""
200	messages: list[dict[str, Any]] = []	1✔
201	for item in self._sequence:	1✔
202	if item["type"] == "message":	1✔
203	messages.append(item["data"])	1✔
204	return messages	1✔
205
206	def start_turn(self, user_message: str) -> None:	1✔
207	"""Start a new conversation turn."""
208	turn_record: dict[str, Any] = {	1✔
209	"user_message": user_message,
210	"start_time": datetime.now().isoformat(),
211	}
212	self._sequence.append({"type": "turn", "data": turn_record})	1✔
213	self._append_record("turn_start", turn_record)	1✔
214
215	# Add user message
216	self.add_message("user", user_message)	1✔
217
218	def end_turn(self, assistant_message: str, thinking: str \| None = None) -> None:	1✔
219	"""End the current conversation turn.
220
221	Args:
222	assistant_message: The assistant's response content.
223	thinking: Optional thinking/reasoning content from the assistant.
224	"""
225	self._last_turn_time = datetime.now()	1✔
226
227	# Add assistant message with optional thinking
228	self.add_message("assistant", assistant_message, thinking=thinking)	1✔
229
230	# Append turn end record
231	self._append_record(	1✔
232	"turn_end",
233	{
234	"assistant_message": assistant_message,
235	},
236	)
237
238	def save(self) -> None:	1✔
239	"""Persist context to storage.
240
241	Creates storage directory if needed and writes all records.
242	"""
243	self._ensure_storage_directory()	1✔
244
245	# Write session_start if file doesn't exist
246	if not self._file_path.exists():	1✔
247	self._write_session_start()	1✔
248
249	# Flush any buffered writes
250	self._flush_pending_records()	1✔
251
252	def load(self) -> bool:	1✔
253	"""Load context from storage.
254
255	Returns:
256	True if context was loaded, False if no stored context exists.
257
258	Raises:
259	ContextCorruptionError: If stored context is corrupted.
260	"""
261	if not self._file_path.exists():	1✔
262	return False	1✔
263
264	# Check if path is safe
265	if not is_safe_path(self._storage_path, self._file_path):	1✔
UNCOV 266	raise SessionNotFoundError(self._session_id)	×
267
268	try:	1✔
269	self._load_from_file()	1✔
270	return True	1✔
271	except json.JSONDecodeError as e:	1✔
UNCOV 272	raise ContextCorruptionError(	×
273	str(self._file_path),
274	e.lineno or 0,
275	f"Invalid JSON: {e.msg}",
276	) from None
277
278	def clear(self) -> None:	1✔
279	"""Clear in-memory context (does not delete from storage)."""
280	self._sequence.clear()	1✔
281	self._tool_call_count = 0	1✔
282	self._last_turn_time = None	1✔
283
284	def delete(self) -> None:	1✔
285	"""Delete stored context from disk.
286
287	Raises:
288	SessionNotFoundError: If session doesn't exist.
289	"""
290	if not self._file_path.exists():	1✔
291	raise SessionNotFoundError(self._session_id)	1✔
292
293	try:	1✔
294	self._file_path.unlink()	1✔
295	log.debug("context_deleted", path=str(self._file_path))	1✔
UNCOV 296	except OSError as e:	×
UNCOV 297	log.error("context_delete_failed", path=str(self._file_path), error=str(e))	×
UNCOV 298	raise	×
299
300	def get_statistics(self) -> ContextStatistics:	1✔
301	"""Get statistics about context usage."""
302	# Count items in sequence
303	message_count = sum(1 for item in self._sequence if item["type"] == "message")	1✔
304	turn_count = sum(1 for item in self._sequence if item["type"] == "turn")	1✔
305
306	return ContextStatistics(	1✔
307	message_count=message_count,
308	turn_count=turn_count,
309	tool_call_count=self._tool_call_count,
310	start_time=self._start_time,
311	last_turn_time=self._last_turn_time,
312	)
313
314	def close(self) -> None:	1✔
315	"""Release resources and flush any pending writes."""
316	self._append_record(	1✔
317	"session_end",
318	{
319	"end_time": datetime.now().isoformat(),
320	},
321	)
322
323	# Private methods
324
325	def _ensure_storage_directory(self) -> None:	1✔
326	"""Create storage directory with secure permissions if it doesn't exist."""
327	if not self._storage_path.exists():	1✔
328	self._storage_path.mkdir(parents=True, mode=DIR_MODE)	1✔
329	log.debug("storage_created", path=str(self._storage_path))	1✔
330	else:
331	# Ensure correct permissions
332	try:	1✔
333	self._storage_path.chmod(DIR_MODE)	1✔
UNCOV 334	except OSError:	×
UNCOV 335	pass # Ignore permission errors on existing directories	×
336
337	def _append_record(self, record_type: str, data: dict[str, Any]) -> None:	1✔
338	"""Append a record to the JSONL file.
339
340	Uses atomic write for crash safety.
341
342	Args:
343	record_type: Type of record (session_start, message, etc.).
344	data: Record data dictionary.
345	"""
346	self._ensure_storage_directory()	1✔
347
348	record = {	1✔
349	"type": record_type,
350	"timestamp": datetime.now().isoformat(),
351	"data": data,
352	}
353
354	self._atomic_write_jsonl(record)	1✔
355
356	def _atomic_write_jsonl(self, record: dict[str, Any]) -> None:	1✔
357	"""Write a record atomically to the JSONL file.
358
359	Uses file locking for atomic appends with secure permissions.
360
361	Args:
362	record: The record dictionary to write.
363	"""
364	import fcntl	1✔
365
366	# Ensure storage directory exists
367	self._ensure_storage_directory()	1✔
368
369	# Create file if needed with secure permissions
370	if not self._file_path.exists():	1✔
371	self._file_path.touch(mode=FILE_MODE)	1✔
372	else:
373	# Ensure permissions on existing file
374	try:	1✔
375	self._file_path.chmod(FILE_MODE)	1✔
UNCOV 376	except OSError:	×
UNCOV 377	pass	×
378
379	# Write with file locking for atomic append
380	with open(self._file_path, "a") as f:	1✔
381	# Acquire exclusive lock
382	fcntl.flock(f.fileno(), fcntl.LOCK_EX)	1✔
383	try:	1✔
384	json.dump(record, f)	1✔
385	f.write("\n")	1✔
386	f.flush()	1✔
387	os.fsync(f.fileno())	1✔
388	finally:
389	fcntl.flock(f.fileno(), fcntl.LOCK_UN)	1✔
390
391	def _write_session_start(self) -> None:	1✔
392	"""Write session_start record."""
393	record = {	1✔
394	"type": "session_start",
395	"timestamp": self._start_time.isoformat(),
396	"data": {
397	"session_id": self._session_id,
398	"start_time": self._start_time.isoformat(),
399	},
400	}
401	self._atomic_write_jsonl(record)	1✔
402
403	def _flush_pending_records(self) -> None:	1✔
404	"""Flush any pending writes to disk.
405
406	For JSONL files, this is a no-op since records are written immediately.
407	"""
408	# Sync file to disk
409	if self._file_path.exists():	1✔
410	with open(self._file_path, "a") as f:	1✔
411	os.fsync(f.fileno())	1✔
412
413	def _load_from_file(self) -> None:	1✔
414	"""Load context from JSONL file.
415
416	Parses all records and reconstructs in-memory state.
417
418	Raises:
419	ContextCorruptionError: If file is corrupted.
420	"""
421	self.clear()	1✔
422
423	line_num = 0	1✔
424	try:	1✔
425	with open(self._file_path) as f:	1✔
426	for line_num, line in enumerate(f, start=1):	1✔
427	line = line.strip()	1✔
428	if not line:	1✔
UNCOV 429	continue	×
430
431	record = json.loads(line)	1✔
432	self._process_record(record, line_num)	1✔
433
434	except json.JSONDecodeError as e:	1✔
435	raise ContextCorruptionError(	1✔
436	str(self._file_path),
437	line_num,
438	f"Invalid JSON: {e.msg}",
439	) from None
440
441	def _process_record(self, record: dict[str, Any], line_num: int) -> None:	1✔
442	"""Process a single JSONL record.
443
444	Args:
445	record: The parsed record dictionary.
446	line_num: Line number for error messages.
447
448	Raises:
449	ContextCorruptionError: If record is malformed.
450	"""
451	if "type" not in record:	1✔
UNCOV 452	raise ContextCorruptionError(	×
453	str(self._file_path),
454	line_num,
455	"Missing record type",
456	)
457
458	record_type = record.get("type")	1✔
459	data = record.get("data", {})	1✔
460
461	if record_type == "session_start":	1✔
462	# Already handled during initialization
463	pass	1✔
464
465	elif record_type == "message":	1✔
466	if "role" not in data or "content" not in data:	1✔
UNCOV 467	raise ContextCorruptionError(	×
468	str(self._file_path),
469	line_num,
470	"Missing message fields",
471	)
472	self._sequence.append({"type": "message", "data": data})	1✔
473
474	elif record_type == "tool_result":	1✔
UNCOV 475	if "tool_id" not in data:	×
UNCOV 476	raise ContextCorruptionError(	×
477	str(self._file_path),
478	line_num,
479	"Missing tool_id",
480	)
UNCOV 481	self._sequence.append({"type": "tool_result", "data": data})	×
UNCOV 482	self._tool_call_count += 1	×
483
484	elif record_type == "tool_call_message":	1✔
485	# Record containing assistant message with tool_calls
UNCOV 486	if "tool_calls" not in data:	×
UNCOV 487	raise ContextCorruptionError(	×
488	str(self._file_path),
489	line_num,
490	"Missing tool_calls",
491	)
492	# Convert to message format
UNCOV 493	self._sequence.append(	×
494	{
495	"type": "message",
496	"data": {
497	"role": "assistant",
498	"tool_calls": data["tool_calls"],
499	"content": "",
500	},
501	}
502	)
503
504	elif record_type == "turn_start":	1✔
UNCOV 505	self._sequence.append({"type": "turn", "data": data})	×
506
507	elif record_type == "turn_end":	1✔
508	# Turn end - nothing special to do, statistics will be computed
UNCOV 509	pass	×
510
511	elif record_type == "session_end":	1✔
512	# Session ended, nothing special to do
513	pass	1✔
514
515	else:
UNCOV 516	log.warning(	×
517	"unknown_record_type",
518	record_type=record_type,
519	line=line_num,
520	)
521
522
523	__all__ = [	1✔
524	"BasicPersistenceContextManager",
525	]

christophevg / yoker / 25117204947

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous