rgerganov commited on
Commit
24b9742
·
1 Parent(s): 0c950d5

rpc : use ggml_context_ptr (llama/12938)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-rpc/ggml-rpc.cpp +23 -22
ggml/src/ggml-rpc/ggml-rpc.cpp CHANGED
@@ -1,6 +1,7 @@
1
  #include "ggml-rpc.h"
2
  #include "ggml-impl.h"
3
  #include "ggml-backend-impl.h"
 
4
 
5
  #include <cinttypes>
6
  #include <string>
@@ -853,12 +854,13 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
853
  /*.no_alloc =*/ true,
854
  };
855
 
856
- struct ggml_context * ctx = ggml_init(params);
 
 
857
  ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
858
 
859
  if (tensor == nullptr) {
860
  GGML_LOG_ERROR("Null tensor pointer passed to server get_alloc_size function.\n");
861
- ggml_free(ctx);
862
  return false;
863
  }
864
 
@@ -871,7 +873,6 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_
871
 
872
  response.alloc_size = ggml_backend_buft_get_alloc_size(buft,tensor);
873
 
874
- ggml_free(ctx);
875
  return true;
876
  }
877
 
@@ -985,11 +986,12 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
985
  /*.mem_buffer =*/ NULL,
986
  /*.no_alloc =*/ true,
987
  };
988
- struct ggml_context * ctx = ggml_init(params);
 
 
989
  ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
990
  if (tensor == nullptr) {
991
  GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
992
- ggml_free(ctx);
993
  return false;
994
  }
995
  GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
@@ -1016,7 +1018,6 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
1016
  printf("[%s] saved to '%s'\n", __func__, cache_file.c_str());
1017
  }
1018
  ggml_backend_tensor_set(tensor, data, offset, size);
1019
- ggml_free(ctx);
1020
  return true;
1021
  }
1022
 
@@ -1060,11 +1061,12 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
1060
  /*.mem_buffer =*/ NULL,
1061
  /*.no_alloc =*/ true,
1062
  };
1063
- struct ggml_context * ctx = ggml_init(params);
 
 
1064
  ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
1065
  if (tensor == nullptr) {
1066
  GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
1067
- ggml_free(ctx);
1068
  return false;
1069
  }
1070
  GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size, *hash);
@@ -1080,7 +1082,6 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
1080
  }
1081
  ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
1082
  response.result = 1;
1083
- ggml_free(ctx);
1084
  return true;
1085
  }
1086
 
@@ -1090,11 +1091,12 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
1090
  /*.mem_buffer =*/ NULL,
1091
  /*.no_alloc =*/ true,
1092
  };
1093
- struct ggml_context * ctx = ggml_init(params);
 
 
1094
  ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
1095
  if (tensor == nullptr) {
1096
  GGML_LOG_ERROR("Null tensor pointer passed to server init_tensor function.\n");
1097
- ggml_free(ctx);
1098
  return false;
1099
  }
1100
 
@@ -1110,11 +1112,9 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
1110
  // This pointer can either be passed around client/server, or probably better stored server-side and kept track of.
1111
  // Currently unimplemented.
1112
  GGML_LOG_ERROR("tensor->extra populated by the backend, this is currently unsupported.\n");
1113
- ggml_free(ctx);
1114
  return false;
1115
  }
1116
 
1117
- ggml_free(ctx);
1118
  return true;
1119
  }
1120
 
@@ -1124,11 +1124,12 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
1124
  /*.mem_buffer =*/ NULL,
1125
  /*.no_alloc =*/ true,
1126
  };
1127
- struct ggml_context * ctx = ggml_init(params);
 
 
1128
  ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
1129
  if (tensor == nullptr) {
1130
  GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
1131
- ggml_free(ctx);
1132
  return false;
1133
  }
1134
  GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
@@ -1147,7 +1148,6 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
1147
 
1148
  response.resize(request.size, 0);
1149
  ggml_backend_tensor_get(tensor, response.data(), request.offset, request.size);
1150
- ggml_free(ctx);
1151
  return true;
1152
  }
1153
 
@@ -1157,12 +1157,14 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1157
  /*.mem_buffer =*/ NULL,
1158
  /*.no_alloc =*/ true,
1159
  };
1160
- struct ggml_context * ctx = ggml_init(params);
 
 
 
1161
  ggml_tensor * src = deserialize_tensor(ctx, &request.src);
1162
  ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
1163
  if (src == nullptr || dst == nullptr) {
1164
  GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
1165
- ggml_free(ctx);
1166
  return false;
1167
  }
1168
 
@@ -1180,7 +1182,6 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1180
  dst_data + src_size,
1181
  dst_base,
1182
  dst_base + dst_buf_sz);
1183
- ggml_free(ctx);
1184
  return false;
1185
  }
1186
 
@@ -1188,7 +1189,6 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
1188
  __func__, (void*) src->buffer, (void*) dst->buffer);
1189
 
1190
  response.result = ggml_backend_buffer_copy_tensor(src, dst);
1191
- ggml_free(ctx);
1192
  return true;
1193
  }
1194
 
@@ -1242,7 +1242,9 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
1242
  /*.mem_buffer =*/ NULL,
1243
  /*.no_alloc =*/ true,
1244
  };
1245
- struct ggml_context * ctx = ggml_init(params);
 
 
1246
  struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false);
1247
  graph->n_nodes = n_nodes;
1248
  std::unordered_map<uint64_t, const rpc_tensor*> tensor_ptrs;
@@ -1257,7 +1259,6 @@ bool rpc_server::graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph
1257
  }
1258
  ggml_status status = ggml_backend_graph_compute(backend, graph);
1259
  response.result = status;
1260
- ggml_free(ctx);
1261
  return true;
1262
  }
1263
 
 
1
  #include "ggml-rpc.h"
2
  #include "ggml-impl.h"
3
  #include "ggml-backend-impl.h"
4
+ #include "ggml-cpp.h"
5
 
6
  #include <cinttypes>
7
  #include <string>
 
854
  /*.no_alloc =*/ true,
855
  };
856
 
857
+ ggml_context_ptr ctx_ptr { ggml_init(params) };
858
+ GGML_ASSERT(ctx_ptr != nullptr);
859
+ ggml_context * ctx = ctx_ptr.get();
860
  ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
861
 
862
  if (tensor == nullptr) {
863
  GGML_LOG_ERROR("Null tensor pointer passed to server get_alloc_size function.\n");
 
864
  return false;
865
  }
866
 
 
873
 
874
  response.alloc_size = ggml_backend_buft_get_alloc_size(buft,tensor);
875
 
 
876
  return true;
877
  }
878
 
 
986
  /*.mem_buffer =*/ NULL,
987
  /*.no_alloc =*/ true,
988
  };
989
+ ggml_context_ptr ctx_ptr { ggml_init(params) };
990
+ GGML_ASSERT(ctx_ptr != nullptr);
991
+ ggml_context * ctx = ctx_ptr.get();
992
  ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
993
  if (tensor == nullptr) {
994
  GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
 
995
  return false;
996
  }
997
  GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
 
1018
  printf("[%s] saved to '%s'\n", __func__, cache_file.c_str());
1019
  }
1020
  ggml_backend_tensor_set(tensor, data, offset, size);
 
1021
  return true;
1022
  }
1023
 
 
1061
  /*.mem_buffer =*/ NULL,
1062
  /*.no_alloc =*/ true,
1063
  };
1064
+ ggml_context_ptr ctx_ptr { ggml_init(params) };
1065
+ GGML_ASSERT(ctx_ptr != nullptr);
1066
+ ggml_context * ctx = ctx_ptr.get();
1067
  ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
1068
  if (tensor == nullptr) {
1069
  GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
 
1070
  return false;
1071
  }
1072
  GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size, *hash);
 
1082
  }
1083
  ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
1084
  response.result = 1;
 
1085
  return true;
1086
  }
1087
 
 
1091
  /*.mem_buffer =*/ NULL,
1092
  /*.no_alloc =*/ true,
1093
  };
1094
+ ggml_context_ptr ctx_ptr { ggml_init(params) };
1095
+ GGML_ASSERT(ctx_ptr != nullptr);
1096
+ ggml_context * ctx = ctx_ptr.get();
1097
  ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
1098
  if (tensor == nullptr) {
1099
  GGML_LOG_ERROR("Null tensor pointer passed to server init_tensor function.\n");
 
1100
  return false;
1101
  }
1102
 
 
1112
  // This pointer can either be passed around client/server, or probably better stored server-side and kept track of.
1113
  // Currently unimplemented.
1114
  GGML_LOG_ERROR("tensor->extra populated by the backend, this is currently unsupported.\n");
 
1115
  return false;
1116
  }
1117
 
 
1118
  return true;
1119
  }
1120
 
 
1124
  /*.mem_buffer =*/ NULL,
1125
  /*.no_alloc =*/ true,
1126
  };
1127
+ ggml_context_ptr ctx_ptr { ggml_init(params) };
1128
+ GGML_ASSERT(ctx_ptr != nullptr);
1129
+ ggml_context * ctx = ctx_ptr.get();
1130
  ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
1131
  if (tensor == nullptr) {
1132
  GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
 
1133
  return false;
1134
  }
1135
  GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, request.offset, request.size);
 
1148
 
1149
  response.resize(request.size, 0);
1150
  ggml_backend_tensor_get(tensor, response.data(), request.offset, request.size);
 
1151
  return true;
1152
  }
1153
 
 
1157
  /*.mem_buffer =*/ NULL,
1158
  /*.no_alloc =*/ true,
1159
  };
1160
+ ggml_context_ptr ctx_ptr { ggml_init(params) };
1161
+ GGML_ASSERT(ctx_ptr != nullptr);
1162
+ ggml_context * ctx = ctx_ptr.get();
1163
+
1164
  ggml_tensor * src = deserialize_tensor(ctx, &request.src);
1165
  ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
1166
  if (src == nullptr || dst == nullptr) {
1167
  GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
 
1168
  return false;
1169
  }
1170
 
 
1182
  dst_data + src_size,
1183
  dst_base,
1184
  dst_base + dst_buf_sz);
 
1185
  return false;
1186
  }
1187
 
 
1189
  __func__, (void*) src->buffer, (void*) dst->buffer);
1190
 
1191
  response.result = ggml_backend_buffer_copy_tensor(src, dst);
 
1192
  return true;
1193
  }
1194
 
 
1242
  /*.mem_buffer =*/ NULL,
1243
  /*.no_alloc =*/ true,
1244
  };
1245
+ ggml_context_ptr ctx_ptr { ggml_init(params) };
1246
+ GGML_ASSERT(ctx_ptr != nullptr);
1247
+ ggml_context * ctx = ctx_ptr.get();
1248
  struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false);
1249
  graph->n_nodes = n_nodes;
1250
  std::unordered_map<uint64_t, const rpc_tensor*> tensor_ptrs;
 
1259
  }
1260
  ggml_status status = ggml_backend_graph_compute(backend, graph);
1261
  response.result = status;
 
1262
  return true;
1263
  }
1264