• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

yunnysunny / bookforge / 23728902738

30 Mar 2026 05:07AM UTC coverage: 69.753%. First build
23728902738

Pull #3

github

cursoragent
style: format onnx embedding helper

Co-authored-by: gaoyang2024 <gaoyang2024@users.noreply.github.com>
Pull Request #3: Cursor/ bc a79f8a8c ff83 498c a86e 8c1ae44409a8 cf0a

65 of 85 branches covered (76.47%)

Branch coverage included in aggregate %.

29 of 51 new or added lines in 3 files covered. (56.86%)

387 of 563 relevant lines covered (68.74%)

27.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.25
/src/core/markdown-parser.ts
1
// Markdown 解析器
2

3
import { copyFile } from 'node:fs/promises';
4
import { marked, type Token, type Tokens, Marked } from 'marked';
5
import { basename, dirname, extname, join } from 'node:path';
6
import type { Env, Heading, MarkdownFile } from '../types/index.js';
7
import { generateIdFromText, isMarkdownFile, mkdirAsync, readFile } from '../utils';
8
import { gitbookExtension } from './marked-plugins/gitbook.plugin.js';
9
import { katexExtension } from './marked-plugins/katex.plugin.js';
10
import { gitbookTabExtension } from './marked-plugins/gitbook-tab.plugin.js';
11
import { gitbookStepperExtension } from './marked-plugins/gitbook-stepper.plugin.js';
12
import {
13
  gitbookIncludeExtension,
14
  type GitbookIncludeToken,
15
  IncludeTokenType,
16
} from './marked-plugins/gitbook-include.plugin.js';
17

18
const renderer = new marked.Renderer();
15✔
19
renderer.heading = ({ tokens, depth }: Tokens.Heading) => {
15✔
20
  const token = tokens[0] as unknown as Heading;
57✔
21
  token.id = generateIdFromText(token.text);
57✔
22
  return `<h${depth} id="${token.id}">
57✔
23
  <a href="#${token.id}" class="anchor"></a>
24
  ${token.text}
25
</h${depth}>`;
26
};
27

28
export interface MarkdownParserOptions {
29
  env: Env;
30
}
31
export interface ToHTMLOptions {
32
  contentPath: string;
33
  destDir: string;
34
}
35
export class MarkdownParser {
36
  private marked: Marked;
37
  private readonly env: Env;
38

39
  constructor(options: MarkdownParserOptions) {
40
    this.env = options.env;
123✔
41
    this.marked = new Marked();
123✔
42
    this.marked.setOptions({
123✔
43
      gfm: true,
44
      breaks: true,
45
      renderer,
46
    });
47
    this.marked.use(gitbookExtension);
123✔
48
    this.marked.use(gitbookTabExtension);
123✔
49
    this.marked.use(gitbookStepperExtension);
123✔
50
    this.marked.use(katexExtension);
123✔
51
    this.marked.use(gitbookIncludeExtension);
123✔
52
  }
53

54
  /**
55
   * 解析 markdown 文件
56
   */
57
  async parseFile(filePath: string): Promise<MarkdownFile> {
58
    const content = await readFile(filePath);
48✔
59
    const headings = this.extractHeadings(content);
45✔
60
    const title = this.extractTitle(content, headings);
45✔
61

62
    return {
45✔
63
      path: filePath,
64
      title,
65
      content,
66
      headings,
67
    };
68
  }
69

70
  public toPlainText(content: string): string {
NEW
71
    return content
×
72
      .replace(/\r\n?/g, '\n')
73
      .replace(/^---[\s\S]*?\n---\n?/m, ' ')
74
      .replace(/```[\s\S]*?```/g, ' ')
75
      .replace(/`([^`]+)`/g, '$1')
76
      .replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
77
      .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
78
      .replace(/^>\s?/gm, '')
79
      .replace(/^#{1,6}\s+/gm, '')
80
      .replace(/[*_~]/g, ' ')
81
      .replace(/\|/g, ' ')
82
      .replace(/\n+/g, ' ')
83
      .replace(/\s+/g, ' ')
84
      .trim();
85
  }
86

87
  /**
88
   * 提取标题结构
89
   */
90
  private extractHeadings(content: string): Heading[] {
91
    const normalizedStr = content.replace(/\r\n?/g, '\n');
54✔
92
    const lines = normalizedStr.split('\n');
54✔
93
    // logger.info('normalized', normalizedStr);
94
    const headings: Heading[] = [];
54✔
95
    const stack: Heading[] = [];
54✔
96

97
    for (const line of lines) {
54✔
98
      const match = line.trim().match(/^(#{1,6})\s+(.+)$/);
858✔
99
      if (match) {
858✔
100
        const level = match[1].length;
153✔
101
        const text = match[2].trim();
153✔
102
        const id = generateIdFromText(text);
153✔
103

104
        const heading: Heading = {
153✔
105
          level,
106
          text,
107
          id,
108
          children: [],
109
        };
110

111
        // 找到合适的父级标题
112
        while (stack.length > 0 && stack[stack.length - 1].level >= level) {
153✔
113
          stack.pop();
57✔
114
        }
115

116
        if (stack.length === 0) {
153✔
117
          headings.push(heading);
51✔
118
        } else {
119
          stack[stack.length - 1].children.push(heading);
102✔
120
        }
121

122
        stack.push(heading);
153✔
123
      }
124
    }
125

126
    return headings;
54✔
127
  }
128

129
  /**
130
   * 提取文档标题
131
   */
132
  private extractTitle(_content: string, headings: Heading[]): string {
133
    // 优先使用第一个一级标题
134
    const firstH1 = headings.find((h) => h.level === 1);
45✔
135
    if (firstH1) {
45✔
136
      return firstH1.text;
42✔
137
    }
138

139
    // 如果没有一级标题,使用第一个标题
140
    if (headings.length > 0) {
3✔
141
      return headings[0].text;
×
142
    }
143

144
    // 如果没有任何标题,使用文件名
145
    return 'Untitled';
3✔
146
  }
147

148
  private async copyResource(src: string, options: ToHTMLOptions) {
149
    const decodedSrc = decodeURIComponent(src);
×
150
    const imageFromPath = join(dirname(options.contentPath), decodedSrc);
×
151
    const imageToPath = join(options.destDir, decodedSrc);
×
152
    const imageToDir = dirname(imageToPath);
×
153
    await mkdirAsync(imageToDir);
×
154
    await copyFile(imageFromPath, imageToPath);
×
155
    return imageToPath;
×
156
  }
157

158
  // /**
159
  //  * 解析表格单元格中的链接
160
  //  */
161
  // private parseTableCellLinks(cell: Tokens.TableCell): void {
162
  //   // 如果单元格内容只是纯文本且包含链接格式,则解析为链接
163
  //   if (cell.tokens.length === 1 && cell.tokens[0].type === 'text') {
164
  //     const textToken = cell.tokens[0] as Tokens.Text;
165
  //     const linkMatch = textToken.text.match(/^\[([^\]]+)\]\(([^)]+)\)$/);
166
  //     if (linkMatch) {
167
  //       let href = linkMatch[2];
168
  //       if (isMarkdownFile(href)) {
169
  //         const path = dirname(href);
170
  //         const filename = basename(href, extname(href));
171
  //         href = path === '.' ? `./${filename}.html` : `${path}/${filename}.html`;
172
  //       }
173
  //       // 将文本 token 替换为链接 token
174
  //       const linkToken: Tokens.Link = {
175
  //         type: 'link',
176
  //         raw: textToken.raw,
177
  //         href,
178
  //         title: null,
179
  //         text: linkMatch[1],
180
  //         tokens: [
181
  //           {
182
  //             type: 'text',
183
  //             raw: linkMatch[1],
184
  //             text: linkMatch[1],
185
  //           },
186
  //         ],
187
  //       };
188
  //       cell.tokens = [linkToken];
189
  //     }
190
  //   }
191
  // }
192

193
  /**
194
   * 将 markdown 转换为 HTML
195
   */
196
  async toHtml(content: string, options: ToHTMLOptions): Promise<string> {
197
    const html = await this.marked.parse(content, {
57✔
198
      async: true,
199
      walkTokens: async (token: Token) => {
200
        if (token.type === 'image') {
264✔
201
          const src = token.href;
×
202
          if (
×
203
            !src
204
            || src.startsWith('http')
205
            || src.startsWith('data:image/')
206
            || src.startsWith('blob:')
207
            || src.startsWith('//')
208
          ) {
209
            return;
×
210
          }
211
          const imageToPath = await this.copyResource(src, options);
×
212
          if (this.env === 'pdf') {
×
213
            const buffer = await readFile(imageToPath, 'base64');
×
214
            const ext = extname(imageToPath).slice(1);
×
215
            token.href = `data:image/${ext};base64,${buffer}`;
×
216
          }
217
        } else if (token.type === 'link') {
264✔
218
          const href = token.href;
×
219
          // 检查 href 是否存在
220
          if (!href) {
×
221
            return;
×
222
          }
223
          if (href.startsWith('http') || href.startsWith('https')) {
×
224
            return;
×
225
          }
226
          if (!isMarkdownFile(href)) {
×
227
            await this.copyResource(href, options);
×
228
            return;
×
229
          }
230
          const path = dirname(href);
×
231
          const filename = basename(href, extname(href));
×
232
          // 确保路径格式正确:如果 path 是 '.',则使用相对路径
233
          const link = path === '.' ? `./${filename}.html` : `${path}/${filename}.html`;
×
234
          token.href = link;
×
235
        } else if (token.type === 'code') {
264✔
236
          // 处理 mermaid 代码块
237
          const codeToken = token as Tokens.Code;
3✔
238
          if (codeToken.lang === 'mermaid') {
3✔
239
            const diagram = codeToken.text;
×
240
            token.type = 'html';
×
241
            token.text = `<pre class="mermaid">
×
242
${diagram}
243
</pre>`;
244
          } else if (!codeToken.lang) {
3✔
245
            // 如果没有指定语言,设置为 plain text
246
            codeToken.lang = 'plain';
×
247
          }
248
        } else if (token.type === IncludeTokenType) {
261✔
249
          const includeToken = token as GitbookIncludeToken;
×
250
          const includeContent = await readFile(
×
251
            join(dirname(options.contentPath), includeToken.path),
252
          );
253
          token.type = 'html';
×
254
          token.text = await this.toHtml(includeContent, options);
×
255
        }
256
      },
257
    });
258
    return html;
57✔
259
  }
260
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc