C API Reference
Overview
libvroom provides a C API wrapper for integration with C projects and FFI bindings to other languages (Python, Rust, Go, etc.). The C API mirrors the C++ functionality with opaque handles and explicit memory management.
Include the header:
#include <libvroom_c.h>Quick Start
#include <libvroom_c.h>
#include <stdio.h>
int main(int argc, char* argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s <file.csv>\n", argv[0]);
return 1;
}
// Load file
libvroom_buffer_t* buffer = libvroom_buffer_load_file(argv[1]);
if (!buffer) {
fprintf(stderr, "Failed to load file\n");
return 1;
}
// Create components
libvroom_parser_t* parser = libvroom_parser_create();
libvroom_error_collector_t* errors = libvroom_error_collector_create(
LIBVROOM_MODE_PERMISSIVE, 100);
libvroom_index_t* index = libvroom_index_create(
libvroom_buffer_length(buffer),
libvroom_recommended_threads());
// Parse with auto-detection
libvroom_detection_result_t* detection = NULL;
libvroom_error_t err = libvroom_parse_auto(
parser, buffer, index, errors, &detection);
if (err == LIBVROOM_OK && detection) {
printf("Parsed successfully!\n");
printf("Columns: %zu\n", libvroom_detection_result_columns(detection));
printf("Has header: %s\n",
libvroom_detection_result_has_header(detection) ? "yes" : "no");
printf("Confidence: %.2f\n", libvroom_detection_result_confidence(detection));
} else {
fprintf(stderr, "Parse failed: %s\n", libvroom_error_string(err));
}
// Check for errors
if (libvroom_error_collector_has_errors(errors)) {
size_t count = libvroom_error_collector_count(errors);
printf("Warnings/errors: %zu\n", count);
for (size_t i = 0; i < count; i++) {
libvroom_parse_error_t error;
if (libvroom_error_collector_get(errors, i, &error) == LIBVROOM_OK) {
printf("Line %zu: %s\n", error.line, error.message);
}
}
}
// Cleanup
libvroom_detection_result_destroy(detection);
libvroom_index_destroy(index);
libvroom_error_collector_destroy(errors);
libvroom_parser_destroy(parser);
libvroom_buffer_destroy(buffer);
return (err == LIBVROOM_OK) ? 0 : 1;
}Version Information
// Get version string (e.g., "0.1.0")
const char* version = libvroom_version();
// Version macros
#define LIBVROOM_VERSION_MAJOR 0
#define LIBVROOM_VERSION_MINOR 1
#define LIBVROOM_VERSION_PATCH 0Error Handling
Error Codes
typedef enum libvroom_error {
LIBVROOM_OK = 0,
LIBVROOM_ERROR_UNCLOSED_QUOTE = 1,
LIBVROOM_ERROR_INVALID_QUOTE_ESCAPE = 2,
LIBVROOM_ERROR_QUOTE_IN_UNQUOTED = 3,
LIBVROOM_ERROR_INCONSISTENT_FIELDS = 4,
LIBVROOM_ERROR_FIELD_TOO_LARGE = 5,
LIBVROOM_ERROR_MIXED_LINE_ENDINGS = 6,
LIBVROOM_ERROR_INVALID_UTF8 = 8,
LIBVROOM_ERROR_NULL_BYTE = 9,
LIBVROOM_ERROR_EMPTY_HEADER = 10,
LIBVROOM_ERROR_DUPLICATE_COLUMNS = 11,
LIBVROOM_ERROR_AMBIGUOUS_SEPARATOR = 12,
LIBVROOM_ERROR_FILE_TOO_LARGE = 13,
LIBVROOM_ERROR_IO = 14,
LIBVROOM_ERROR_INTERNAL = 15,
LIBVROOM_ERROR_NULL_POINTER = 100,
LIBVROOM_ERROR_INVALID_ARGUMENT = 101,
LIBVROOM_ERROR_OUT_OF_MEMORY = 102,
LIBVROOM_ERROR_INVALID_HANDLE = 103
} libvroom_error_t;
// Get human-readable error message
const char* libvroom_error_string(libvroom_error_t error);Severity Levels
typedef enum libvroom_severity {
LIBVROOM_SEVERITY_WARNING = 0, // Non-fatal, parser continues
LIBVROOM_SEVERITY_ERROR = 1, // Recoverable, can skip row
LIBVROOM_SEVERITY_FATAL = 2 // Unrecoverable, parsing must stop
} libvroom_severity_t;Error Modes
typedef enum libvroom_error_mode {
LIBVROOM_MODE_STRICT = 0, // Stop on first error
LIBVROOM_MODE_PERMISSIVE = 1, // Collect all errors, try to recover
LIBVROOM_MODE_BEST_EFFORT = 2 // Ignore errors, parse what's possible
} libvroom_error_mode_t;Buffer Management
Loading Data
// Load from file
libvroom_buffer_t* buffer = libvroom_buffer_load_file("data.csv");
// Create from memory
const uint8_t* data = ...;
size_t length = ...;
libvroom_buffer_t* buffer = libvroom_buffer_create(data, length);Accessing Buffer Data
// Get data pointer
const uint8_t* data = libvroom_buffer_data(buffer);
// Get length
size_t length = libvroom_buffer_length(buffer);Cleanup
libvroom_buffer_destroy(buffer);Encoding Detection and Transcoding
libvroom automatically detects and handles different file encodings.
Supported Encodings
typedef enum libvroom_encoding {
LIBVROOM_ENCODING_UTF8 = 0, // UTF-8 (default)
LIBVROOM_ENCODING_UTF8_BOM = 1, // UTF-8 with BOM
LIBVROOM_ENCODING_UTF16_LE = 2, // UTF-16 Little Endian
LIBVROOM_ENCODING_UTF16_BE = 3, // UTF-16 Big Endian
LIBVROOM_ENCODING_UTF32_LE = 4, // UTF-32 Little Endian
LIBVROOM_ENCODING_UTF32_BE = 5, // UTF-32 Big Endian
LIBVROOM_ENCODING_LATIN1 = 6, // Latin-1 (ISO-8859-1)
LIBVROOM_ENCODING_UNKNOWN = 7 // Unknown encoding
} libvroom_encoding_t;
// Get encoding name
const char* name = libvroom_encoding_string(encoding);Loading with Auto-Transcoding
// Load file with automatic encoding detection and transcoding
libvroom_load_result_t* result = libvroom_load_file_with_encoding("data.csv");
if (result) {
// Get the UTF-8 data
const uint8_t* data = libvroom_load_result_data(result);
size_t length = libvroom_load_result_length(result);
// Get encoding info
libvroom_encoding_t encoding = libvroom_load_result_encoding(result);
double confidence = libvroom_load_result_confidence(result);
bool transcoded = libvroom_load_result_was_transcoded(result);
printf("Encoding: %s\n", libvroom_encoding_string(encoding));
printf("Confidence: %.2f\n", confidence);
printf("Was transcoded: %s\n", transcoded ? "yes" : "no");
// Create buffer for parsing
libvroom_buffer_t* buffer = libvroom_load_result_to_buffer(result);
// Parse...
libvroom_buffer_destroy(buffer);
libvroom_load_result_destroy(result);
}Manual Encoding Detection
libvroom_encoding_result_t encoding_result;
libvroom_error_t err = libvroom_detect_encoding(data, length, &encoding_result);
if (err == LIBVROOM_OK) {
printf("Encoding: %s\n", libvroom_encoding_string(encoding_result.encoding));
printf("BOM length: %zu\n", encoding_result.bom_length);
printf("Confidence: %.2f\n", encoding_result.confidence);
printf("Needs transcoding: %s\n", encoding_result.needs_transcoding ? "yes" : "no");
}Dialect Configuration
Creating a Dialect
// Create custom dialect
libvroom_dialect_t* dialect = libvroom_dialect_create(
';', // delimiter
'"', // quote_char
'\0', // escape_char (unused with double_quote=true)
true // double_quote (escape quotes as "")
);Accessing Dialect Properties
char delim = libvroom_dialect_delimiter(dialect);
char quote = libvroom_dialect_quote_char(dialect);
char escape = libvroom_dialect_escape_char(dialect);
bool dbl_quote = libvroom_dialect_double_quote(dialect);Cleanup
libvroom_dialect_destroy(dialect);Error Collector
Creating an Error Collector
// Create collector with mode and max error limit
libvroom_error_collector_t* errors = libvroom_error_collector_create(
LIBVROOM_MODE_PERMISSIVE, // error mode
1000 // max errors to collect
);Checking Errors
// Check if any errors occurred
if (libvroom_error_collector_has_errors(errors)) {
// Check for fatal errors
if (libvroom_error_collector_has_fatal(errors)) {
fprintf(stderr, "Fatal error occurred\n");
}
// Get error count
size_t count = libvroom_error_collector_count(errors);
// Get each error
for (size_t i = 0; i < count; i++) {
libvroom_parse_error_t error;
if (libvroom_error_collector_get(errors, i, &error) == LIBVROOM_OK) {
printf("Error at line %zu, column %zu: %s\n",
error.line, error.column, error.message);
}
}
}Error Structure
typedef struct libvroom_parse_error {
libvroom_error_t code; // Error code
libvroom_severity_t severity; // Warning, error, or fatal
size_t line; // 1-based line number
size_t column; // 1-based column number
size_t byte_offset; // Byte position in file
const char* message; // Error message
const char* context; // Context around error
} libvroom_parse_error_t;Important: The message and context pointers are valid only until the error collector is destroyed or cleared. Copy strings if you need to persist them.
Cleanup
libvroom_error_collector_clear(errors); // Clear errors
libvroom_error_collector_destroy(errors); // Free collectorIndex Structure
The index stores field boundary positions from parsing.
Creating an Index
libvroom_index_t* index = libvroom_index_create(
buffer_length, // Size of data
libvroom_recommended_threads() // Number of threads
);Accessing Index Data
// Number of threads used
size_t threads = libvroom_index_num_threads(index);
// Number of columns detected
size_t columns = libvroom_index_columns(index);
// Index count per thread
uint64_t count_t0 = libvroom_index_count(index, 0);
// Total index count
uint64_t total = libvroom_index_total_count(index);
// Raw positions array
const uint64_t* positions = libvroom_index_positions(index);Cleanup
libvroom_index_destroy(index);Parser
Creating a Parser
libvroom_parser_t* parser = libvroom_parser_create();Parsing with Known Dialect
libvroom_dialect_t* dialect = libvroom_dialect_create(',', '"', '\0', true);
libvroom_error_t err = libvroom_parse(
parser,
buffer,
index,
errors,
dialect // Can be NULL for auto-detection
);
libvroom_dialect_destroy(dialect);Parsing with Auto-Detection
libvroom_detection_result_t* detection = NULL;
libvroom_error_t err = libvroom_parse_auto(
parser,
buffer,
index,
errors,
&detection // Receives detection result
);
if (detection) {
// Use detection results...
libvroom_detection_result_destroy(detection);
}Cleanup
libvroom_parser_destroy(parser);Dialect Detection
Detecting Without Parsing
libvroom_detection_result_t* detection = libvroom_detect_dialect(buffer);
if (detection && libvroom_detection_result_success(detection)) {
// Get detected dialect
libvroom_dialect_t* dialect = libvroom_detection_result_dialect(detection);
// Get detection info
size_t columns = libvroom_detection_result_columns(detection);
size_t rows_analyzed = libvroom_detection_result_rows_analyzed(detection);
bool has_header = libvroom_detection_result_has_header(detection);
double confidence = libvroom_detection_result_confidence(detection);
// Check for warnings
const char* warning = libvroom_detection_result_warning(detection);
if (warning) {
printf("Warning: %s\n", warning);
}
libvroom_dialect_destroy(dialect);
}
libvroom_detection_result_destroy(detection);Utility Functions
// Get recommended thread count for this system
size_t threads = libvroom_recommended_threads();
// Get required SIMD padding size
size_t padding = libvroom_simd_padding();Complete Example: Parse and Iterate
#include <libvroom_c.h>
#include <stdio.h>
#include <string.h>
void process_csv(const char* filename) {
// Load with encoding detection
libvroom_load_result_t* load_result = libvroom_load_file_with_encoding(filename);
if (!load_result) {
fprintf(stderr, "Failed to load file\n");
return;
}
// Create buffer from load result
libvroom_buffer_t* buffer = libvroom_load_result_to_buffer(load_result);
const uint8_t* data = libvroom_buffer_data(buffer);
size_t length = libvroom_buffer_length(buffer);
// Create parser components
libvroom_parser_t* parser = libvroom_parser_create();
libvroom_error_collector_t* errors = libvroom_error_collector_create(
LIBVROOM_MODE_PERMISSIVE, 100);
libvroom_index_t* index = libvroom_index_create(
length, libvroom_recommended_threads());
// Parse with auto-detection
libvroom_detection_result_t* detection = NULL;
libvroom_error_t err = libvroom_parse_auto(parser, buffer, index, errors, &detection);
if (err == LIBVROOM_OK) {
size_t num_columns = libvroom_detection_result_columns(detection);
uint64_t total_fields = libvroom_index_total_count(index);
size_t num_rows = total_fields / num_columns;
printf("File: %s\n", filename);
printf("Encoding: %s\n",
libvroom_encoding_string(libvroom_load_result_encoding(load_result)));
printf("Columns: %zu\n", num_columns);
printf("Rows: %zu (including header)\n", num_rows);
printf("Confidence: %.2f%%\n",
libvroom_detection_result_confidence(detection) * 100);
// Access field positions
const uint64_t* positions = libvroom_index_positions(index);
size_t threads = libvroom_index_num_threads(index);
// Print first few field offsets
printf("First 10 field offsets: ");
for (size_t i = 0; i < 10 && i < total_fields; i++) {
// Interleaved by thread
size_t thread = i % threads;
size_t local_idx = i / threads;
size_t actual_idx = thread * (total_fields / threads + 1) + local_idx;
if (actual_idx < total_fields) {
printf("%llu ", (unsigned long long)positions[actual_idx]);
}
}
printf("\n");
}
// Check for errors
if (libvroom_error_collector_has_errors(errors)) {
printf("Parse errors:\n");
size_t count = libvroom_error_collector_count(errors);
for (size_t i = 0; i < count && i < 5; i++) {
libvroom_parse_error_t error;
libvroom_error_collector_get(errors, i, &error);
printf(" Line %zu: %s\n", error.line, error.message);
}
if (count > 5) {
printf(" ... and %zu more\n", count - 5);
}
}
// Cleanup
libvroom_detection_result_destroy(detection);
libvroom_index_destroy(index);
libvroom_error_collector_destroy(errors);
libvroom_parser_destroy(parser);
libvroom_buffer_destroy(buffer);
libvroom_load_result_destroy(load_result);
}
int main(int argc, char* argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s <file.csv>\n", argv[0]);
return 1;
}
printf("libvroom version: %s\n\n", libvroom_version());
process_csv(argv[1]);
return 0;
}Building with the C API
CMake
# Using FetchContent
include(FetchContent)
FetchContent_Declare(libvroom
GIT_REPOSITORY https://github.com/jimhester/libvroom.git
GIT_TAG main
)
set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
set(BUILD_BENCHMARKS OFF CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(libvroom)
add_executable(my_c_app main.c)
target_link_libraries(my_c_app PRIVATE libvroom_lib)Manual Compilation
gcc -std=c11 main.c -I/path/to/include -L/path/to/lib -lvroom -lstdc++ -lm -o my_appThread Safety
- Parser objects are NOT thread-safe. Create one parser per thread.
- Buffer objects can be shared for reading across threads.
- Index objects should not be shared during parsing.
- Error collectors are NOT thread-safe.
Memory Management Rules
- Every
*_create()or*_load*()function returns an allocated object that must be freed with the corresponding*_destroy()function. - Never free pointers returned by accessor functions (e.g.,
libvroom_buffer_data()). - Strings in
libvroom_parse_error_tare valid only until the error collector is modified or destroyed. - Detection result dialect must be freed separately with
libvroom_dialect_destroy().
See Also
- Getting Started - Basic usage with C++ API
- Integration Guide - CMake integration options
- Error Handling - Error modes and recovery