We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hi, I'm trying to use that C binding in SPDK. I'm trying to test the movement before that, Here is the code I use:
`#include <stdio.h> #include <stdlib.h> #include "tokenizers_c.h" #include <string.h> #include <assert.h>
size_t file_length(FILE *file){ assert(file); fseek(file, 0, SEEK_END); size_t length = ftell(file); rewind(file); return length; }
// char* read_file(const char* filename, size_t* out_size) { FILE* file = fopen(filename, "r"); if (!file) { printf("Error: Cannot open file: %s\n", filename); return NULL; } out_size = file_length(file); char buffer = (char*)malloc(*out_size + 1); if (!buffer) { printf("Memory allocation failed!\n"); fclose(file); return NULL; } size_t read_bytes = fread(buffer, 1, *out_size, file); fclose(file); if (read_bytes != *out_size) { printf("Error: File read mismatch (%zu != %zu)\n", read_bytes, *out_size); free(buffer); return NULL; } buffer[*out_size] = '\0'; // return buffer; }
int main() { // size_t vocab_len, merge_len; char* vocab = read_file("./tokenizer_files/vocab.json", &vocab_len); char* merges = read_file("./tokenizer_files/merges.json", &merge_len);
if (!vocab || !merges) { printf("Error: Failed to load vocab or merges file.\n"); return 1; } printf("vocab.json & merges.json successfully loaded! (Vocab: %zu bytes, Merges: %zu bytes)\n", vocab_len, merge_len); // TokenizerHandle tokenizer = byte_level_bpe_tokenizers_new_from_str(vocab, vocab_len, merges, merge_len, NULL, 0); free(vocab); free(merges); if (!tokenizer) { printf("Tokenizer creation failed!\n"); return 1; } printf("Tokenizer successfully created!\n"); // const char* test_sentence = "Hello, this is a BPE tokenizer test!"; TokenizerEncodeResult result; tokenizers_encode(tokenizer, test_sentence, strlen(test_sentence), 1, &result); if (!result.token_ids || result.len == 0) { printf("❌ Tokenization failed!\n"); tokenizers_free(tokenizer); return 1; } // printf("Tokenized: "); for (size_t i = 0; i < result.len; i++) { printf("%d ", result.token_ids[i]); } printf("\n"); // tokenizers_free_encode_results(&result, 1); tokenizers_free(tokenizer); printf(" Tokenizer cleanup completed.\n"); return 0;
}`
However, the following error occurs: thread '<unnamed>' panicked at src/lib.rs:38:75: calledResult::unwrap()on anErrvalue: Error("EOF while parsing a value", line: 1, column: 0) note: run withRUST_BACKTRACE=1` environment variable to display a backtrace thread '' panicked at core/src/panicking.rs:221:5: panic in a function that cannot unwind stack backtrace: 0: 0x7fae7cd9f1fa - std::backtrace_rs::backtrace::libunwind::trace::h5a5b8284f2d0c266 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/../../backtrace/src/backtrace/libunwind.rs:116:5 1: 0x7fae7cd9f1fa - std::backtrace_rs::backtrace::trace_unsynchronized::h76d4f1c9b0b875e3 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5 2: 0x7fae7cd9f1fa - std::sys::backtrace::_print_fmt::hc4546b8364a537c6 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:66:9 3: 0x7fae7cd9f1fa - <std::sys::backtrace::BacktraceLock::print::DisplayBacktrace as core::fmt::Display>::fmt::h5b6bd5631a6d1f6b at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:39:26 4: 0x7fae7cded593 - core::fmt::rt::Argument::fmt::h270f6602a2b96f62 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/fmt/rt.rs:177:76 5: 0x7fae7cded593 - core::fmt::write::h7550c97b06c86515 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/fmt/mod.rs:1186:21 6: 0x7fae7cd935c3 - std::io::Write::write_fmt::h7b09c64fe0be9c84 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/io/mod.rs:1839:15 7: 0x7fae7cd9f042 - std::sys::backtrace::BacktraceLock::print::h2395ccd2c84ba3aa at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:42:9 8: 0x7fae7cda164a - std::panicking::default_hook::{{closure}}::he19d4c7230e07961 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:268:22 9: 0x7fae7cda1490 - std::panicking::default_hook::hf614597d3c67bbdb at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:295:9 10: 0x7fae7cda1c87 - std::panicking::rust_panic_with_hook::h8942133a8b252070 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:801:13 11: 0x7fae7cda1ae6 - std::panicking::begin_panic_handler::{{closure}}::hb5f5963570096b29 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:667:13 12: 0x7fae7cd9f6d9 - std::sys::backtrace::__rust_end_short_backtrace::h6208cedc1922feda at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:170:18 13: 0x7fae7cda17ac - rust_begin_unwind at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:665:5 14: 0x7fae7caa82dd - core::panicking::panic_nounwind_fmt::runtime::h1f507a806003dfb2 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:112:18 15: 0x7fae7caa82dd - core::panicking::panic_nounwind_fmt::h357fc035dc231634 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:122:5 16: 0x7fae7caa8372 - core::panicking::panic_nounwind::hd0dad372654c389a at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:221:5 17: 0x7fae7caa8536 - core::panicking::panic_cannot_unwind::h65aefd062253eb19 at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:310:5 18: 0x7fae7cab1cbb - byte_level_bpe_tokenizers_new_from_str 19: 0x55d8368165ad - main 20: 0x7fae7c63e083 - __libc_start_main at /build/glibc-FcRMwW/glibc-2.31/csu/../csu/libc-start.c:308:16 21: 0x55d8368162ae - _start 22: 0x0 - thread caused non-unwinding panic. aborting.
thread '<unnamed>' panicked at src/lib.rs:38:75: called
on an
value: Error("EOF while parsing a value", line: 1, column: 0) note: run with
` I would like to know more about examples of using C language headers, or to get advice on errors. Thank you.
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Hi, I'm trying to use that C binding in SPDK.
I'm trying to test the movement before that,
Here is the code I use:
`#include <stdio.h>
#include <stdlib.h>
#include "tokenizers_c.h"
#include <string.h>
#include <assert.h>
size_t file_length(FILE *file){
assert(file);
fseek(file, 0, SEEK_END);
size_t length = ftell(file);
rewind(file);
return length;
}
//
char* read_file(const char* filename, size_t* out_size) {
FILE* file = fopen(filename, "r");
if (!file) {
printf("Error: Cannot open file: %s\n", filename);
return NULL;
}
out_size = file_length(file);
char buffer = (char*)malloc(*out_size + 1);
if (!buffer) {
printf("Memory allocation failed!\n");
fclose(file);
return NULL;
}
size_t read_bytes = fread(buffer, 1, *out_size, file);
fclose(file);
if (read_bytes != *out_size) {
printf("Error: File read mismatch (%zu != %zu)\n", read_bytes, *out_size);
free(buffer);
return NULL;
}
buffer[*out_size] = '\0'; //
return buffer;
}
int main() {
//
size_t vocab_len, merge_len;
char* vocab = read_file("./tokenizer_files/vocab.json", &vocab_len);
char* merges = read_file("./tokenizer_files/merges.json", &merge_len);
}`
However, the following error occurs:
thread '<unnamed>' panicked at src/lib.rs:38:75: called
Result::unwrap()on an
Errvalue: Error("EOF while parsing a value", line: 1, column: 0) note: run with
RUST_BACKTRACE=1` environment variable to display a backtracethread '' panicked at core/src/panicking.rs:221:5:
panic in a function that cannot unwind
stack backtrace:
0: 0x7fae7cd9f1fa - std::backtrace_rs::backtrace::libunwind::trace::h5a5b8284f2d0c266
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/../../backtrace/src/backtrace/libunwind.rs:116:5
1: 0x7fae7cd9f1fa - std::backtrace_rs::backtrace::trace_unsynchronized::h76d4f1c9b0b875e3
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
2: 0x7fae7cd9f1fa - std::sys::backtrace::_print_fmt::hc4546b8364a537c6
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:66:9
3: 0x7fae7cd9f1fa - <std::sys::backtrace::BacktraceLock::print::DisplayBacktrace as core::fmt::Display>::fmt::h5b6bd5631a6d1f6b
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:39:26
4: 0x7fae7cded593 - core::fmt::rt::Argument::fmt::h270f6602a2b96f62
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/fmt/rt.rs:177:76
5: 0x7fae7cded593 - core::fmt::write::h7550c97b06c86515
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/fmt/mod.rs:1186:21
6: 0x7fae7cd935c3 - std::io::Write::write_fmt::h7b09c64fe0be9c84
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/io/mod.rs:1839:15
7: 0x7fae7cd9f042 - std::sys::backtrace::BacktraceLock::print::h2395ccd2c84ba3aa
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:42:9
8: 0x7fae7cda164a - std::panicking::default_hook::{{closure}}::he19d4c7230e07961
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:268:22
9: 0x7fae7cda1490 - std::panicking::default_hook::hf614597d3c67bbdb
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:295:9
10: 0x7fae7cda1c87 - std::panicking::rust_panic_with_hook::h8942133a8b252070
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:801:13
11: 0x7fae7cda1ae6 - std::panicking::begin_panic_handler::{{closure}}::hb5f5963570096b29
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:667:13
12: 0x7fae7cd9f6d9 - std::sys::backtrace::__rust_end_short_backtrace::h6208cedc1922feda
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/sys/backtrace.rs:170:18
13: 0x7fae7cda17ac - rust_begin_unwind
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/std/src/panicking.rs:665:5
14: 0x7fae7caa82dd - core::panicking::panic_nounwind_fmt::runtime::h1f507a806003dfb2
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:112:18
15: 0x7fae7caa82dd - core::panicking::panic_nounwind_fmt::h357fc035dc231634
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:122:5
16: 0x7fae7caa8372 - core::panicking::panic_nounwind::hd0dad372654c389a
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:221:5
17: 0x7fae7caa8536 - core::panicking::panic_cannot_unwind::h65aefd062253eb19
at /rustc/90b35a6239c3d8bdabc530a6a0816f7ff89a0aaf/library/core/src/panicking.rs:310:5
18: 0x7fae7cab1cbb - byte_level_bpe_tokenizers_new_from_str
19: 0x55d8368165ad - main
20: 0x7fae7c63e083 - __libc_start_main
at /build/glibc-FcRMwW/glibc-2.31/csu/../csu/libc-start.c:308:16
21: 0x55d8368162ae - _start
22: 0x0 -
thread caused non-unwinding panic. aborting.
`
I would like to know more about examples of using C language headers, or to get advice on errors. Thank you.
The text was updated successfully, but these errors were encountered: