21157153067

Committed 20 Jan 2026 02:13AM UTC coverage: 90.342% (+0.4%) from 89.931%

Build # 21157153067

Build Type

push

github

Committed by

grencez

Commit Message

Update localserv to support chat interface and CLI args

Run Details

2142 of 2371 relevant lines covered (90.34%)

280.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.68

/test/language/inference_test.cc

#include "src/chat/display.hh"
#include "src/chat/opt.hh"
#include "src/chat/trajectory.hh"
#include "src/language/inference.hh"
#include "src/language/vocabulary.hh"

#include <cassert>
#include <iostream>
#include <tuple>
#include <vector>

#include <fildesh/fildesh.h>

#include "llama.h"

using rendezllama::ChatDisplay;
using rendezllama::ChatOptions;
using rendezllama::ChatTrajectory;
using rendezllama::Inference;
using rendezllama::Vocabulary;

static
  void
noop_log_callback(enum ggml_log_level level, const char* text, void* user_data)
{
  (void) level;
  (void) text;
  (void) user_data;
}

static void test_antiprompt_suffix() {
  std::set<std::string> antiprompts;
  antiprompts.insert("User:");
  antiprompts.insert("\nUser:");

  // Case: No match
  assert(rendezllama::antiprompt_suffix("Hello World", antiprompts).empty());

  // Case: Exact match
  assert(rendezllama::antiprompt_suffix("User:", antiprompts) == "User:");

  // Case: Suffix match
  assert(rendezllama::antiprompt_suffix("Hello User:", antiprompts) == "User:");

  // Case: Longest match check
  assert(rendezllama::antiprompt_suffix("Hello\nUser:", antiprompts) == "\nUser:");

  // Case: Partial match should fail
  assert(rendezllama::antiprompt_suffix("User", antiprompts).empty());
}

static void inference_test(const std::string& model_filename) {
  llama_log_set(noop_log_callback, NULL);

  ChatOptions opt;
  opt.model_filename = model_filename;
  // Initialize infer_via with default Sampling to avoid assertion failure in Inference::reinitialize.
  opt.infer_via.emplace<rendezllama::inference::Sampling>();

  struct llama_model* model = nullptr;
  struct llama_context* ctx = nullptr;
  std::tie(model, ctx) = rendezllama::make_llama_context(opt);
  assert(model);
  assert(ctx);

  Vocabulary vocabulary(model);
  Inference inference(vocabulary);
  ChatTrajectory chat_traj(vocabulary.bos_token_id());
  ChatDisplay chat_disp;
  // Use /dev/null for display to avoid spamming test output, we check tokens programmatically.
  chat_disp.out_ = open_FildeshOF("/dev/null");

  // Add a simple prompt to start generation.
  chat_traj.tokenize_append("Once upon a time", vocabulary);
  chat_disp.show_new(chat_traj, vocabulary);

  using rendezllama::inference::AdjustViaKind;
  using rendezllama::inference::Sampling;

  std::vector<Sampling> samplings;
  // Temperature
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::temperature>, 0.8f);
    samplings.push_back(s);
  }
  // Top K
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::top_k>, 40u);
    samplings.push_back(s);
  }
  // Top P
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::top_p>, 0.9f);
    samplings.push_back(s);
  }
  // Min P
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::min_p>, 0.05f);
    samplings.push_back(s);
  }
   // Typical P
  {
    Sampling s;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::typical_p>, 0.9f);
    samplings.push_back(s);
  }
  // Greedy
  {
    Sampling s;
    s.pick_via = rendezllama::inference::Determinism{};
    samplings.push_back(s);
  }
  // Adaptive P
  {
    Sampling s;
    s.pick_via = rendezllama::inference::AdaptiveP{0.55f, 0.9f};
    samplings.push_back(s);
  }
  // Mirostat V2
  {
    Sampling s;
    s.pick_via = rendezllama::inference::Mirostat{2, 5.0f, 0.1f};
    samplings.push_back(s);
  }
  // Penalties
  {
    Sampling s;
    rendezllama::inference::PenalizeWith p;
    p.window_length = 5;
    p.repetition = 1.1f;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::penalize_with>, p);
    samplings.push_back(s);
  }
  // Dry
  {
    Sampling s;
    rendezllama::inference::Dry d;
    d.multiplier = 0.8f;
    d.base = 1.75f;
    d.allowed_length = 2;
    d.window_length = 0; // Default.
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::dry>, d);
    samplings.push_back(s);
  }
  // XTC
  {
    Sampling s;
    rendezllama::inference::Xtc x;
    x.threshold = 0.1f;
    x.probability = 0.5f;
    s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::xtc>, x);
    samplings.push_back(s);
  }

  // Iterate through different sampling options.
  bool all_good = true;
  for (const auto& sampling : samplings) {
    opt.infer_via = sampling;
    if (!inference.commit_to_context(ctx, chat_disp, chat_traj, opt, model)) {
      all_good = false;
      break;
    }
    inference.sample_to_trajectory(chat_traj, ctx, false);
    chat_disp.show_new(chat_traj, vocabulary);
  }
  assert(all_good);

  llama_free(ctx);
  llama_model_free(model);
}

int main(int argc, char** argv)
{
  rendezllama::GlobalScope rendezllama_global_scope;
  assert(argc == 2);
  test_antiprompt_suffix();
  inference_test(argv[1]);
  return 0;
}

1	#include "src/chat/display.hh"
2	#include "src/chat/opt.hh"
3	#include "src/chat/trajectory.hh"
4	#include "src/language/inference.hh"
5	#include "src/language/vocabulary.hh"
6
7	#include <cassert>
8	#include <iostream>
9	#include <tuple>
10	#include <vector>
11
12	#include <fildesh/fildesh.h>
13
14	#include "llama.h"
15
16	using rendezllama::ChatDisplay;
17	using rendezllama::ChatOptions;
18	using rendezllama::ChatTrajectory;
19	using rendezllama::Inference;
20	using rendezllama::Vocabulary;
21
22	static
23	void
24	noop_log_callback(enum ggml_log_level level, const char* text, void* user_data)	184✔
25	{
26	(void) level;	184✔
27	(void) text;	184✔
28	(void) user_data;	184✔
29	}	184✔
30
31	static void test_antiprompt_suffix() {	1✔
32	std::set<std::string> antiprompts;	1✔
33	antiprompts.insert("User:");	2✔
34	antiprompts.insert("\nUser:");	2✔
35
36	// Case: No match
37	assert(rendezllama::antiprompt_suffix("Hello World", antiprompts).empty());	1✔
38
39	// Case: Exact match
40	assert(rendezllama::antiprompt_suffix("User:", antiprompts) == "User:");	1✔
41
42	// Case: Suffix match
43	assert(rendezllama::antiprompt_suffix("Hello User:", antiprompts) == "User:");	1✔
44
45	// Case: Longest match check
46	assert(rendezllama::antiprompt_suffix("Hello\nUser:", antiprompts) == "\nUser:");	1✔
47
48	// Case: Partial match should fail
49	assert(rendezllama::antiprompt_suffix("User", antiprompts).empty());	1✔
50	}	1✔
51
52	static void inference_test(const std::string& model_filename) {	1✔
53	llama_log_set(noop_log_callback, NULL);	1✔
54
55	ChatOptions opt;	1✔
56	opt.model_filename = model_filename;	1✔
57	// Initialize infer_via with default Sampling to avoid assertion failure in Inference::reinitialize.
58	opt.infer_via.emplace<rendezllama::inference::Sampling>();	1✔
59
60	struct llama_model* model = nullptr;	1✔
61	struct llama_context* ctx = nullptr;	1✔
62	std::tie(model, ctx) = rendezllama::make_llama_context(opt);	1✔
63	assert(model);	1✔
64	assert(ctx);	1✔
65
66	Vocabulary vocabulary(model);	1✔
67	Inference inference(vocabulary);	1✔
68	ChatTrajectory chat_traj(vocabulary.bos_token_id());	1✔
69	ChatDisplay chat_disp;	1✔
70	// Use /dev/null for display to avoid spamming test output, we check tokens programmatically.
71	chat_disp.out_ = open_FildeshOF("/dev/null");	1✔
72
73	// Add a simple prompt to start generation.
74	chat_traj.tokenize_append("Once upon a time", vocabulary);	1✔
75	chat_disp.show_new(chat_traj, vocabulary);	1✔
76
77	using rendezllama::inference::AdjustViaKind;	1✔
78	using rendezllama::inference::Sampling;	1✔
79
80	std::vector<Sampling> samplings;	1✔
81	// Temperature
82	{	1✔
83	Sampling s;	1✔
84	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::temperature>, 0.8f);	1✔
85	samplings.push_back(s);	1✔
86	}	×
87	// Top K
88	{	1✔
89	Sampling s;	1✔
90	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::top_k>, 40u);	1✔
91	samplings.push_back(s);	1✔
92	}	×
93	// Top P
94	{	1✔
95	Sampling s;	1✔
96	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::top_p>, 0.9f);	1✔
97	samplings.push_back(s);	1✔
98	}	×
99	// Min P
100	{	1✔
101	Sampling s;	1✔
102	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::min_p>, 0.05f);	1✔
103	samplings.push_back(s);	1✔
104	}	×
105	// Typical P
106	{	1✔
107	Sampling s;	1✔
108	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::typical_p>, 0.9f);	1✔
109	samplings.push_back(s);	1✔
110	}	×
111	// Greedy
112	{	1✔
113	Sampling s;	1✔
114	s.pick_via = rendezllama::inference::Determinism{};	1✔
115	samplings.push_back(s);	1✔
116	}	×
117	// Adaptive P
118	{	1✔
119	Sampling s;	1✔
120	s.pick_via = rendezllama::inference::AdaptiveP{0.55f, 0.9f};	1✔
121	samplings.push_back(s);	1✔
122	}	×
123	// Mirostat V2
124	{	1✔
125	Sampling s;	1✔
126	s.pick_via = rendezllama::inference::Mirostat{2, 5.0f, 0.1f};	1✔
127	samplings.push_back(s);	1✔
128	}	×
129	// Penalties
130	{	1✔
131	Sampling s;	1✔
132	rendezllama::inference::PenalizeWith p;	1✔
133	p.window_length = 5;	1✔
134	p.repetition = 1.1f;	1✔
135	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::penalize_with>, p);	1✔
136	samplings.push_back(s);	1✔
137	}	×
138	// Dry
139	{	1✔
140	Sampling s;	1✔
141	rendezllama::inference::Dry d;	1✔
142	d.multiplier = 0.8f;	1✔
143	d.base = 1.75f;	1✔
144	d.allowed_length = 2;	1✔
145	d.window_length = 0; // Default.	1✔
146	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::dry>, d);	1✔
147	samplings.push_back(s);	1✔
148	}	×
149	// XTC
150	{	1✔
151	Sampling s;	1✔
152	rendezllama::inference::Xtc x;	1✔
153	x.threshold = 0.1f;	1✔
154	x.probability = 0.5f;	1✔
155	s.adjust_thru.emplace_back(std::in_place_index<AdjustViaKind::xtc>, x);	1✔
156	samplings.push_back(s);	1✔
157	}	×
158
159	// Iterate through different sampling options.
160	bool all_good = true;	1✔
161	for (const auto& sampling : samplings) {	12✔
162	opt.infer_via = sampling;	11✔
163	if (!inference.commit_to_context(ctx, chat_disp, chat_traj, opt, model)) {	11✔
164	all_good = false;
165	break;
166	}
167	inference.sample_to_trajectory(chat_traj, ctx, false);	11✔
168	chat_disp.show_new(chat_traj, vocabulary);	11✔
169	}
170	assert(all_good);	1✔
171
172	llama_free(ctx);	1✔
173	llama_model_free(model);	1✔
174	}	1✔
175
176	int main(int argc, char** argv)	1✔
177	{
178	rendezllama::GlobalScope rendezllama_global_scope;	1✔
179	assert(argc == 2);	1✔
180	test_antiprompt_suffix();	1✔
181	inference_test(argv[1]);	2✔
182	return 0;	1✔
183	}	1✔

rendezqueue / rendezllama / 21157153067

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous