Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ggml/include/gguf.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ extern "C" {

GGML_API struct gguf_context * gguf_init_empty(void);
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
GGML_API struct gguf_context * gguf_init_from_buffer(const void * buffer, size_t size, struct gguf_init_params params);

GGML_API void gguf_free(struct gguf_context * ctx);

Expand Down
71 changes: 60 additions & 11 deletions ggml/src/gguf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,25 @@ struct gguf_context {
};

struct gguf_reader {
FILE * file;
FILE * file = nullptr;
const uint8_t * buffer = nullptr;
size_t buffer_size = 0;
mutable size_t buffer_pos = 0;

gguf_reader(FILE * file) : file(file) {}
gguf_reader(const void * buffer, size_t size) : buffer((const uint8_t *)buffer), buffer_size(size) {}

template <typename T>
bool read(T & dst) const {
return fread(&dst, 1, sizeof(dst), file) == sizeof(dst);
if (file) {
return fread(&dst, 1, sizeof(dst), file) == sizeof(dst);
}
if (buffer_pos + sizeof(T) > buffer_size) {
return false;
}
memcpy(&dst, buffer + buffer_pos, sizeof(T));
buffer_pos += sizeof(T);
return true;
}

template <typename T>
Expand Down Expand Up @@ -278,11 +290,27 @@ struct gguf_reader {
return false;
}
dst.resize(size);
return fread(dst.data(), 1, dst.length(), file) == dst.length();
if (file) {
return fread(dst.data(), 1, dst.length(), file) == dst.length();
}
if (buffer_pos + dst.length() > buffer_size) {
return false;
}
memcpy(dst.data(), buffer + buffer_pos, dst.length());
buffer_pos += dst.length();
return true;
}

bool read(void * dst, const size_t size) const {
return fread(dst, 1, size, file) == size;
if (file) {
return fread(dst, 1, size, file) == size;
}
if (buffer_pos + size > buffer_size) {
return false;
}
memcpy(dst, buffer + buffer_pos, size);
buffer_pos += size;
return true;
}
};

Expand Down Expand Up @@ -316,8 +344,7 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
return true;
}

struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
const struct gguf_reader gr(file);
static struct gguf_context * gguf_init_from_reader_impl(const gguf_reader & gr, struct gguf_init_params params) {
struct gguf_context * ctx = new gguf_context;

bool ok = true;
Expand Down Expand Up @@ -610,14 +637,26 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);

// we require the data section to be aligned, so take into account any padding
if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
gguf_free(ctx);
return nullptr;
const size_t offset_curr = gr.file ? ftell(gr.file) : gr.buffer_pos;
const size_t offset_aligned = GGML_PAD(offset_curr, ctx->alignment);

if (gr.file) {
if (fseek(gr.file, offset_aligned, SEEK_SET) != 0) {
GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
gguf_free(ctx);
return nullptr;
}
} else {
if (offset_aligned > gr.buffer_size) {
GGML_LOG_ERROR("%s: buffer overflow when seeking to data section\n", __func__);
gguf_free(ctx);
return nullptr;
}
gr.buffer_pos = offset_aligned;
}

// store the current file offset - this is where the data section starts
ctx->offset = ftell(file);
ctx->offset = offset_aligned;

// compute the total size of the data section, taking into account the alignment
{
Expand Down Expand Up @@ -730,6 +769,16 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
return ctx;
}

struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
const struct gguf_reader gr(file);
return gguf_init_from_reader_impl(gr, params);
}

struct gguf_context * gguf_init_from_buffer(const void * buffer, size_t size, struct gguf_init_params params) {
const struct gguf_reader gr(buffer, size);
return gguf_init_from_reader_impl(gr, params);
}

struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
FILE * file = ggml_fopen(fname, "rb");

Expand Down
5 changes: 5 additions & 0 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,11 @@ extern "C" {
const char * path_model,
struct llama_model_params params);

LLAMA_API struct llama_model * llama_model_load_from_buffer(
const void * buffer,
size_t size,
struct llama_model_params params);

// Load the model from multiple splits (support custom naming scheme)
// The paths must be in the correct order
LLAMA_API struct llama_model * llama_model_load_from_splits(
Expand Down
44 changes: 44 additions & 0 deletions src/llama-mmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,14 @@ struct llama_file::impl {
seek(0, SEEK_SET);
}

impl(const void * buffer, size_t size) : buffer(buffer), size(size) {
fp = NULL;
}

size_t tell() const {
if (buffer) {
return buffer_pos;
}
// TODO: this ifdef is never true?
#ifdef _WIN32
__int64 ret = _ftelli64(fp);
Expand All @@ -184,6 +191,16 @@ struct llama_file::impl {
}

void seek(size_t offset, int whence) const {
if (buffer) {
if (whence == SEEK_SET) {
buffer_pos = offset;
} else if (whence == SEEK_CUR) {
buffer_pos += offset;
} else if (whence == SEEK_END) {
buffer_pos = size + offset;
}
return;
}
// TODO: this ifdef is never true?
#ifdef _WIN32
int ret = _fseeki64(fp, (__int64) offset, whence);
Expand All @@ -199,6 +216,14 @@ struct llama_file::impl {
if (len == 0) {
return;
}
if (buffer) {
if (buffer_pos + len > size) {
throw std::runtime_error("unexpectedly reached end of buffer");
}
memcpy(ptr, (const char *)buffer + buffer_pos, len);
buffer_pos += len;
return;
}
errno = 0;
std::size_t ret = std::fread(ptr, len, 1, fp);
if (ferror(fp)) {
Expand All @@ -219,6 +244,9 @@ struct llama_file::impl {
if (len == 0) {
return;
}
if (buffer) {
throw std::runtime_error("write to buffer not supported");
}
errno = 0;
size_t ret = std::fwrite(ptr, len, 1, fp);
if (ret != 1) {
Expand All @@ -239,13 +267,17 @@ struct llama_file::impl {

FILE * fp;
size_t size;
const void * buffer = nullptr;
mutable size_t buffer_pos = 0;
};

llama_file::llama_file(const char * fname, const char * mode) : pimpl(std::make_unique<impl>(fname, mode)) {}
llama_file::llama_file(const void * buffer, size_t size) : pimpl(std::make_unique<impl>(buffer, size)) {}
llama_file::~llama_file() = default;

size_t llama_file::tell() const { return pimpl->tell(); }
size_t llama_file::size() const { return pimpl->size; }
const void * llama_file::buffer() const { return pimpl->buffer; }

int llama_file::file_id() const {
#ifdef _WIN32
Expand All @@ -272,9 +304,15 @@ void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
struct llama_mmap::impl {
#ifdef _POSIX_MAPPED_FILES
std::vector<std::pair<size_t, size_t>> mapped_fragments;
bool is_buffer = false;

impl(struct llama_file * file, size_t prefetch, bool numa) {
size = file->size();
if (file->buffer()) {
addr = const_cast<void*>(file->buffer());
is_buffer = true;
return;
}
int fd = file->file_id();
int flags = MAP_SHARED;
if (numa) { prefetch = 0; }
Expand Down Expand Up @@ -319,6 +357,9 @@ struct llama_mmap::impl {
}

void unmap_fragment(size_t first, size_t last) {
if (is_buffer) {
return;
}
int page_size = sysconf(_SC_PAGESIZE);
align_range(&first, &last, page_size);
size_t len = last - first;
Expand Down Expand Up @@ -355,6 +396,9 @@ struct llama_mmap::impl {
}

~impl() {
if (is_buffer) {
return;
}
for (const auto & frag : mapped_fragments) {
if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
Expand Down
3 changes: 3 additions & 0 deletions src/llama-mmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ using llama_mlocks = std::vector<std::unique_ptr<llama_mlock>>;

struct llama_file {
llama_file(const char * fname, const char * mode);
llama_file(const void * buffer, size_t size);
~llama_file();

size_t tell() const;
size_t size() const;

const void * buffer() const;

int file_id() const; // fileno overload

void seek(size_t offset, int whence) const;
Expand Down
Loading