21147017983

Committed 19 Jan 2026 05:50PM UTC coverage: 90.909% (+2.9%) from 87.997%

Build # 21147017983

Build Type

push

github

Committed by

grencez

Commit Message

qual(test): inference

Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>

Run Details

408 of 417 new or added lines in 5 files covered. (97.84%)

2040 of 2244 relevant lines covered (90.91%)

114.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.51

/test/language/inference_test.cc

#include "src/chat/display.hh"
#include "src/chat/opt.hh"
#include "src/chat/trajectory.hh"
#include "src/language/inference.hh"
#include "src/language/vocabulary.hh"

#include <cassert>
#include <iostream>
#include <tuple>
#include <vector>

#include <fildesh/fildesh.h>

#include "llama.h"

using rendezllama::ChatDisplay;
using rendezllama::ChatOptions;
using rendezllama::ChatTrajectory;
using rendezllama::Inference;
using rendezllama::Vocabulary;

static
  void
noop_log_callback(enum ggml_log_level level, const char* text, void* user_data)
{
  (void) level;
  (void) text;
  (void) user_data;
}

static void test_antiprompt_suffix() {
  std::set<std::string> antiprompts;
  antiprompts.insert("User:");
  antiprompts.insert("\nUser:");

  // Case: No match
  assert(rendezllama::antiprompt_suffix("Hello World", antiprompts).empty());

  // Case: Exact match
  assert(rendezllama::antiprompt_suffix("User:", antiprompts) == "User:");

  // Case: Suffix match
  assert(rendezllama::antiprompt_suffix("Hello User:", antiprompts) == "User:");

  // Case: Longest match check
  assert(rendezllama::antiprompt_suffix("Hello\nUser:", antiprompts) == "\nUser:");

  // Case: Partial match should fail
  assert(rendezllama::antiprompt_suffix("User", antiprompts).empty());
}

static void inference_test(const std::string& model_filename) {
  llama_log_set(noop_log_callback, NULL);

  ChatOptions opt;
  opt.model_filename = model_filename;
  // Initialize infer_via with default Sampling to avoid assertion failure in Inference::reinitialize.
  opt.infer_via.emplace<rendezllama::inference::Sampling>();

  struct llama_model* model = nullptr;
  struct llama_context* ctx = nullptr;
  std::tie(model, ctx) = rendezllama::make_llama_context(opt);
  assert(model);
  assert(ctx);

  Vocabulary vocabulary(model);
  Inference inference(vocabulary);
  ChatTrajectory chat_traj(vocabulary.bos_token_id());
  ChatDisplay chat_disp;
  // Use /dev/null for display to avoid spamming test output, we check tokens programmatically.
  chat_disp.out_ = open_FildeshOF("/dev/null");

  // Add a simple prompt to start generation.
  chat_traj.tokenize_append("Once upon a time", vocabulary);
  chat_disp.show_new(chat_traj, vocabulary);

  using rendezllama::inference::AdjustViaKind;
  using rendezllama::inference::Sampling;

  std::vector<Sampling> samplings;
  // Temperature
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::temperature>, 0.8f);
    samplings.push_back(s);
  }
  // Top K
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::top_k>, 40u);
    samplings.push_back(s);
  }
  // Top P
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::top_p>, 0.9f);
    samplings.push_back(s);
  }
  // Min P
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::min_p>, 0.05f);
    samplings.push_back(s);
  }
   // Typical P
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::typical_p>, 0.9f);
    samplings.push_back(s);
  }
  // Mirostat V2
  {
    Sampling s;
    s.pick_via = rendezllama::inference::Mirostat{2, 5.0f, 0.1f};
    samplings.push_back(s);
  }
  // Penalties
  {
    Sampling s;
    rendezllama::inference::PenalizeWith p;
    p.window_length = 5;
    p.repetition = 1.1f;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::penalize_with>, p);
    samplings.push_back(s);
  }
  // Dry
  {
    Sampling s;
    rendezllama::inference::Dry d;
    d.multiplier = 0.8f;
    d.base = 1.75f;
    d.allowed_length = 2;
    d.window_length = 0; // Default.
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::dry>, d);
    samplings.push_back(s);
  }
  // XTC
  {
    Sampling s;
    rendezllama::inference::Xtc x;
    x.threshold = 0.1f;
    x.probability = 0.5f;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::xtc>, x);
    samplings.push_back(s);
  }

  // Iterate through different sampling options.
  for (const auto& sampling : samplings) {
    opt.infer_via = sampling;
    if (!inference.commit_to_context(ctx, chat_disp, chat_traj, opt, model)) {
      break;
    }
    inference.sample_to_trajectory(chat_traj, ctx, false);
    chat_disp.show_new(chat_traj, vocabulary);
  }

  llama_free(ctx);
  llama_model_free(model);
}

int main(int argc, char** argv)
{
  rendezllama::GlobalScope rendezllama_global_scope;
  assert(argc == 2);
  test_antiprompt_suffix();
  inference_test(argv[1]);
  return 0;
}

1	#include "src/chat/display.hh"
2	#include "src/chat/opt.hh"
3	#include "src/chat/trajectory.hh"
4	#include "src/language/inference.hh"
5	#include "src/language/vocabulary.hh"
6
7	#include <cassert>
8	#include <iostream>
9	#include <tuple>
10	#include <vector>
11
12	#include <fildesh/fildesh.h>
13
14	#include "llama.h"
15
16	using rendezllama::ChatDisplay;
17	using rendezllama::ChatOptions;
18	using rendezllama::ChatTrajectory;
19	using rendezllama::Inference;
20	using rendezllama::Vocabulary;
21
22	static
23	void
24	noop_log_callback(enum ggml_log_level level, const char* text, void* user_data)	173✔
25	{
26	(void) level;	173✔
27	(void) text;	173✔
28	(void) user_data;	173✔
29	}	173✔
30
31	static void test_antiprompt_suffix() {	1✔
32	std::set<std::string> antiprompts;	1✔
33	antiprompts.insert("User:");	2✔
34	antiprompts.insert("\nUser:");	2✔
35
36	// Case: No match
37	assert(rendezllama::antiprompt_suffix("Hello World", antiprompts).empty());	1✔
38
39	// Case: Exact match
40	assert(rendezllama::antiprompt_suffix("User:", antiprompts) == "User:");	1✔
41
42	// Case: Suffix match
43	assert(rendezllama::antiprompt_suffix("Hello User:", antiprompts) == "User:");	1✔
44
45	// Case: Longest match check
46	assert(rendezllama::antiprompt_suffix("Hello\nUser:", antiprompts) == "\nUser:");	1✔
47
48	// Case: Partial match should fail
49	assert(rendezllama::antiprompt_suffix("User", antiprompts).empty());	1✔
50	}	1✔
51
52	static void inference_test(const std::string& model_filename) {	1✔
53	llama_log_set(noop_log_callback, NULL);	1✔
54
55	ChatOptions opt;	1✔
56	opt.model_filename = model_filename;	1✔
57	// Initialize infer_via with default Sampling to avoid assertion failure in Inference::reinitialize.
58	opt.infer_via.emplace<rendezllama::inference::Sampling>();	1✔
59
60	struct llama_model* model = nullptr;	1✔
61	struct llama_context* ctx = nullptr;	1✔
62	std::tie(model, ctx) = rendezllama::make_llama_context(opt);	1✔
63	assert(model);	1✔
64	assert(ctx);	1✔
65
66	Vocabulary vocabulary(model);	1✔
67	Inference inference(vocabulary);	1✔
68	ChatTrajectory chat_traj(vocabulary.bos_token_id());	1✔
69	ChatDisplay chat_disp;	1✔
70	// Use /dev/null for display to avoid spamming test output, we check tokens programmatically.
71	chat_disp.out_ = open_FildeshOF("/dev/null");	1✔
72
73	// Add a simple prompt to start generation.
74	chat_traj.tokenize_append("Once upon a time", vocabulary);	1✔
75	chat_disp.show_new(chat_traj, vocabulary);	1✔
76
77	using rendezllama::inference::AdjustViaKind;	1✔
78	using rendezllama::inference::Sampling;	1✔
79
80	std::vector<Sampling> samplings;	1✔
81	// Temperature
82	{	1✔
83	Sampling s;	1✔
84	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::temperature>, 0.8f);	1✔
85	samplings.push_back(s);	1✔
NEW 86	}	×
87	// Top K
88	{	1✔
89	Sampling s;	1✔
90	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::top_k>, 40u);	1✔
91	samplings.push_back(s);	1✔
NEW 92	}	×
93	// Top P
94	{	1✔
95	Sampling s;	1✔
96	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::top_p>, 0.9f);	1✔
97	samplings.push_back(s);	1✔
NEW 98	}	×
99	// Min P
100	{	1✔
101	Sampling s;	1✔
102	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::min_p>, 0.05f);	1✔
103	samplings.push_back(s);	1✔
NEW 104	}	×
105	// Typical P
106	{	1✔
107	Sampling s;	1✔
108	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::typical_p>, 0.9f);	1✔
109	samplings.push_back(s);	1✔
NEW 110	}	×
111	// Mirostat V2
112	{	1✔
113	Sampling s;	1✔
114	s.pick_via = rendezllama::inference::Mirostat{2, 5.0f, 0.1f};	1✔
115	samplings.push_back(s);	1✔
NEW 116	}	×
117	// Penalties
118	{	1✔
119	Sampling s;	1✔
120	rendezllama::inference::PenalizeWith p;	1✔
121	p.window_length = 5;	1✔
122	p.repetition = 1.1f;	1✔
123	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::penalize_with>, p);	1✔
124	samplings.push_back(s);	1✔
NEW 125	}	×
126	// Dry
127	{	1✔
128	Sampling s;	1✔
129	rendezllama::inference::Dry d;	1✔
130	d.multiplier = 0.8f;	1✔
131	d.base = 1.75f;	1✔
132	d.allowed_length = 2;	1✔
133	d.window_length = 0; // Default.	1✔
134	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::dry>, d);	1✔
135	samplings.push_back(s);	1✔
NEW 136	}	×
137	// XTC
138	{	1✔
139	Sampling s;	1✔
140	rendezllama::inference::Xtc x;	1✔
141	x.threshold = 0.1f;	1✔
142	x.probability = 0.5f;	1✔
143	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::xtc>, x);	1✔
144	samplings.push_back(s);	1✔
NEW 145	}	×
146
147	// Iterate through different sampling options.
148	for (const auto& sampling : samplings) {	10✔
149	opt.infer_via = sampling;	9✔
150	if (!inference.commit_to_context(ctx, chat_disp, chat_traj, opt, model)) {	9✔
151	break;
152	}
153	inference.sample_to_trajectory(chat_traj, ctx, false);	9✔
154	chat_disp.show_new(chat_traj, vocabulary);	9✔
155	}
156
157	llama_free(ctx);	1✔
158	llama_model_free(model);	1✔
159	}	1✔
160
161	int main(int argc, char** argv)	1✔
162	{
163	rendezllama::GlobalScope rendezllama_global_scope;	1✔
164	assert(argc == 2);	1✔
165	test_antiprompt_suffix();	1✔
166	inference_test(argv[1]);	2✔
167	return 0;	1✔
168	}	1✔

rendezqueue / rendezllama / 21147017983

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous