Spaces:
Sleeping
Sleeping
rpc : use ggml_context_ptr (llama/12938)
Browse files- ggml/src/ggml-rpc/ggml-rpc.cpp +23 -22
ggml/src/ggml-rpc/ggml-rpc.cpp
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
#include "ggml-rpc.h"
|
| 2 |
#include "ggml-impl.h"
|
| 3 |
#include "ggml-backend-impl.h"
|
|
|
|
| 4 |
|
| 5 |
#include <cinttypes>
|
| 6 |
#include <string>
|
|
@@ -853,12 +854,13 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
|
|
| 853 |
/*.no_alloc =*/ true,
|
| 854 |
};
|
| 855 |
|
| 856 |
-
|
|
|
|
|
|
|
| 857 |
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
| 858 |
|
| 859 |
if (tensor == nullptr) {
|
| 860 |
GGML_LOG_ERROR("Null tensor pointer passed to server get_alloc_size function.\n");
|
| 861 |
-
ggml_free(ctx);
|
| 862 |
return false;
|
| 863 |
}
|
| 864 |
|
|
@@ -871,7 +873,6 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
|
|
| 871 |
|
| 872 |
response.alloc_size = ggml_backend_buft_get_alloc_size(buft,tensor);
|
| 873 |
|
| 874 |
-
ggml_free(ctx);
|
| 875 |
return true;
|
| 876 |
}
|
| 877 |
|
|
@@ -985,11 +986,12 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
|
|
| 985 |
/*.mem_buffer =*/ NULL,
|
| 986 |
/*.no_alloc =*/ true,
|
| 987 |
};
|
| 988 |
-
|
|
|
|
|
|
|
| 989 |
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
|
| 990 |
if (tensor == nullptr) {
|
| 991 |
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
| 992 |
-
ggml_free(ctx);
|
| 993 |
return false;
|
| 994 |
}
|
| 995 |
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
|
|
@@ -1016,7 +1018,6 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
|
|
| 1016 |
printf("[%s] saved to '%s'\n", __func__, cache_file.c_str());
|
| 1017 |
}
|
| 1018 |
ggml_backend_tensor_set(tensor, data, offset, size);
|
| 1019 |
-
ggml_free(ctx);
|
| 1020 |
return true;
|
| 1021 |
}
|
| 1022 |
|
|
@@ -1060,11 +1061,12 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
|
|
| 1060 |
/*.mem_buffer =*/ NULL,
|
| 1061 |
/*.no_alloc =*/ true,
|
| 1062 |
};
|
| 1063 |
-
|
|
|
|
|
|
|
| 1064 |
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
|
| 1065 |
if (tensor == nullptr) {
|
| 1066 |
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
| 1067 |
-
ggml_free(ctx);
|
| 1068 |
return false;
|
| 1069 |
}
|
| 1070 |
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size, *hash);
|
|
@@ -1080,7 +1082,6 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
|
|
| 1080 |
}
|
| 1081 |
ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
|
| 1082 |
response.result = 1;
|
| 1083 |
-
ggml_free(ctx);
|
| 1084 |
return true;
|
| 1085 |
}
|
| 1086 |
|
|
@@ -1090,11 +1091,12 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
|
|
| 1090 |
/*.mem_buffer =*/ NULL,
|
| 1091 |
/*.no_alloc =*/ true,
|
| 1092 |
};
|
| 1093 |
-
|
|
|
|
|
|
|
| 1094 |
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
| 1095 |
if (tensor == nullptr) {
|
| 1096 |
GGML_LOG_ERROR("Null tensor pointer passed to server init_tensor function.\n");
|
| 1097 |
-
ggml_free(ctx);
|
| 1098 |
return false;
|
| 1099 |
}
|
| 1100 |
|
|
@@ -1110,11 +1112,9 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
|
|
| 1110 |
// This pointer can either be passed around client/server, or probably better stored server-side and kept track of.
|
| 1111 |
// Currently unimplemented.
|
| 1112 |
GGML_LOG_ERROR("tensor->extra populated by the backend, this is currently unsupported.\n");
|
| 1113 |
-
ggml_free(ctx);
|
| 1114 |
return false;
|
| 1115 |
}
|
| 1116 |
|
| 1117 |
-
ggml_free(ctx);
|
| 1118 |
return true;
|
| 1119 |
}
|
| 1120 |
|
|
@@ -1124,11 +1124,12 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
|
|
| 1124 |
/*.mem_buffer =*/ NULL,
|
| 1125 |
/*.no_alloc =*/ true,
|
| 1126 |
};
|
| 1127 |
-
|
|
|
|
|
|
|
| 1128 |
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
| 1129 |
if (tensor == nullptr) {
|
| 1130 |
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
| 1131 |
-
ggml_free(ctx);
|
| 1132 |
return false;
|
| 1133 |
}
|
| 1134 |
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
|
|
@@ -1147,7 +1148,6 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
|
|
| 1147 |
|
| 1148 |
response.resize(request.size, 0);
|
| 1149 |
ggml_backend_tensor_get(tensor, response.data(), request.offset, request.size);
|
| 1150 |
-
ggml_free(ctx);
|
| 1151 |
return true;
|
| 1152 |
}
|
| 1153 |
|
|
@@ -1157,12 +1157,14 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
|
|
| 1157 |
/*.mem_buffer =*/ NULL,
|
| 1158 |
/*.no_alloc =*/ true,
|
| 1159 |
};
|
| 1160 |
-
|
|
|
|
|
|
|
|
|
|
| 1161 |
ggml_tensor * src = deserialize_tensor(ctx, &request.src);
|
| 1162 |
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
|
| 1163 |
if (src == nullptr || dst == nullptr) {
|
| 1164 |
GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
|
| 1165 |
-
ggml_free(ctx);
|
| 1166 |
return false;
|
| 1167 |
}
|
| 1168 |
|
|
@@ -1180,7 +1182,6 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
|
|
| 1180 |
dst_data + src_size,
|
| 1181 |
dst_base,
|
| 1182 |
dst_base + dst_buf_sz);
|
| 1183 |
-
ggml_free(ctx);
|
| 1184 |
return false;
|
| 1185 |
}
|
| 1186 |
|
|
@@ -1188,7 +1189,6 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
|
|
| 1188 |
__func__, (void*) src->buffer, (void*) dst->buffer);
|
| 1189 |
|
| 1190 |
response.result = ggml_backend_buffer_copy_tensor(src, dst);
|
| 1191 |
-
ggml_free(ctx);
|
| 1192 |
return true;
|
| 1193 |
}
|
| 1194 |
|
|
@@ -1242,7 +1242,9 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
|
|
| 1242 |
/*.mem_buffer =*/ NULL,
|
| 1243 |
/*.no_alloc =*/ true,
|
| 1244 |
};
|
| 1245 |
-
|
|
|
|
|
|
|
| 1246 |
struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false);
|
| 1247 |
graph->n_nodes = n_nodes;
|
| 1248 |
std::unordered_map<uint64_t, const rpc_tensor*> tensor_ptrs;
|
|
@@ -1257,7 +1259,6 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
|
|
| 1257 |
}
|
| 1258 |
ggml_status status = ggml_backend_graph_compute(backend, graph);
|
| 1259 |
response.result = status;
|
| 1260 |
-
ggml_free(ctx);
|
| 1261 |
return true;
|
| 1262 |
}
|
| 1263 |
|
|
|
|
| 1 |
#include "ggml-rpc.h"
|
| 2 |
#include "ggml-impl.h"
|
| 3 |
#include "ggml-backend-impl.h"
|
| 4 |
+
#include "ggml-cpp.h"
|
| 5 |
|
| 6 |
#include <cinttypes>
|
| 7 |
#include <string>
|
|
|
|
| 854 |
/*.no_alloc =*/ true,
|
| 855 |
};
|
| 856 |
|
| 857 |
+
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
| 858 |
+
GGML_ASSERT(ctx_ptr != nullptr);
|
| 859 |
+
ggml_context * ctx = ctx_ptr.get();
|
| 860 |
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
| 861 |
|
| 862 |
if (tensor == nullptr) {
|
| 863 |
GGML_LOG_ERROR("Null tensor pointer passed to server get_alloc_size function.\n");
|
|
|
|
| 864 |
return false;
|
| 865 |
}
|
| 866 |
|
|
|
|
| 873 |
|
| 874 |
response.alloc_size = ggml_backend_buft_get_alloc_size(buft,tensor);
|
| 875 |
|
|
|
|
| 876 |
return true;
|
| 877 |
}
|
| 878 |
|
|
|
|
| 986 |
/*.mem_buffer =*/ NULL,
|
| 987 |
/*.no_alloc =*/ true,
|
| 988 |
};
|
| 989 |
+
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
| 990 |
+
GGML_ASSERT(ctx_ptr != nullptr);
|
| 991 |
+
ggml_context * ctx = ctx_ptr.get();
|
| 992 |
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
|
| 993 |
if (tensor == nullptr) {
|
| 994 |
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
|
|
|
| 995 |
return false;
|
| 996 |
}
|
| 997 |
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
|
|
|
|
| 1018 |
printf("[%s] saved to '%s'\n", __func__, cache_file.c_str());
|
| 1019 |
}
|
| 1020 |
ggml_backend_tensor_set(tensor, data, offset, size);
|
|
|
|
| 1021 |
return true;
|
| 1022 |
}
|
| 1023 |
|
|
|
|
| 1061 |
/*.mem_buffer =*/ NULL,
|
| 1062 |
/*.no_alloc =*/ true,
|
| 1063 |
};
|
| 1064 |
+
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
| 1065 |
+
GGML_ASSERT(ctx_ptr != nullptr);
|
| 1066 |
+
ggml_context * ctx = ctx_ptr.get();
|
| 1067 |
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
|
| 1068 |
if (tensor == nullptr) {
|
| 1069 |
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
|
|
|
| 1070 |
return false;
|
| 1071 |
}
|
| 1072 |
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size, *hash);
|
|
|
|
| 1082 |
}
|
| 1083 |
ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
|
| 1084 |
response.result = 1;
|
|
|
|
| 1085 |
return true;
|
| 1086 |
}
|
| 1087 |
|
|
|
|
| 1091 |
/*.mem_buffer =*/ NULL,
|
| 1092 |
/*.no_alloc =*/ true,
|
| 1093 |
};
|
| 1094 |
+
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
| 1095 |
+
GGML_ASSERT(ctx_ptr != nullptr);
|
| 1096 |
+
ggml_context * ctx = ctx_ptr.get();
|
| 1097 |
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
| 1098 |
if (tensor == nullptr) {
|
| 1099 |
GGML_LOG_ERROR("Null tensor pointer passed to server init_tensor function.\n");
|
|
|
|
| 1100 |
return false;
|
| 1101 |
}
|
| 1102 |
|
|
|
|
| 1112 |
// This pointer can either be passed around client/server, or probably better stored server-side and kept track of.
|
| 1113 |
// Currently unimplemented.
|
| 1114 |
GGML_LOG_ERROR("tensor->extra populated by the backend, this is currently unsupported.\n");
|
|
|
|
| 1115 |
return false;
|
| 1116 |
}
|
| 1117 |
|
|
|
|
| 1118 |
return true;
|
| 1119 |
}
|
| 1120 |
|
|
|
|
| 1124 |
/*.mem_buffer =*/ NULL,
|
| 1125 |
/*.no_alloc =*/ true,
|
| 1126 |
};
|
| 1127 |
+
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
| 1128 |
+
GGML_ASSERT(ctx_ptr != nullptr);
|
| 1129 |
+
ggml_context * ctx = ctx_ptr.get();
|
| 1130 |
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
| 1131 |
if (tensor == nullptr) {
|
| 1132 |
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
|
|
|
| 1133 |
return false;
|
| 1134 |
}
|
| 1135 |
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
|
|
|
|
| 1148 |
|
| 1149 |
response.resize(request.size, 0);
|
| 1150 |
ggml_backend_tensor_get(tensor, response.data(), request.offset, request.size);
|
|
|
|
| 1151 |
return true;
|
| 1152 |
}
|
| 1153 |
|
|
|
|
| 1157 |
/*.mem_buffer =*/ NULL,
|
| 1158 |
/*.no_alloc =*/ true,
|
| 1159 |
};
|
| 1160 |
+
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
| 1161 |
+
GGML_ASSERT(ctx_ptr != nullptr);
|
| 1162 |
+
ggml_context * ctx = ctx_ptr.get();
|
| 1163 |
+
|
| 1164 |
ggml_tensor * src = deserialize_tensor(ctx, &request.src);
|
| 1165 |
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
|
| 1166 |
if (src == nullptr || dst == nullptr) {
|
| 1167 |
GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
|
|
|
|
| 1168 |
return false;
|
| 1169 |
}
|
| 1170 |
|
|
|
|
| 1182 |
dst_data + src_size,
|
| 1183 |
dst_base,
|
| 1184 |
dst_base + dst_buf_sz);
|
|
|
|
| 1185 |
return false;
|
| 1186 |
}
|
| 1187 |
|
|
|
|
| 1189 |
__func__, (void*) src->buffer, (void*) dst->buffer);
|
| 1190 |
|
| 1191 |
response.result = ggml_backend_buffer_copy_tensor(src, dst);
|
|
|
|
| 1192 |
return true;
|
| 1193 |
}
|
| 1194 |
|
|
|
|
| 1242 |
/*.mem_buffer =*/ NULL,
|
| 1243 |
/*.no_alloc =*/ true,
|
| 1244 |
};
|
| 1245 |
+
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
| 1246 |
+
GGML_ASSERT(ctx_ptr != nullptr);
|
| 1247 |
+
ggml_context * ctx = ctx_ptr.get();
|
| 1248 |
struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false);
|
| 1249 |
graph->n_nodes = n_nodes;
|
| 1250 |
std::unordered_map<uint64_t, const rpc_tensor*> tensor_ptrs;
|
|
|
|
| 1259 |
}
|
| 1260 |
ggml_status status = ggml_backend_graph_compute(backend, graph);
|
| 1261 |
response.result = status;
|
|
|
|
| 1262 |
return true;
|
| 1263 |
}
|
| 1264 |
|