32 #include "../internal.h"
37 #include <tensorflow/c/c_api.h>
56 #define OFFSET(x) offsetof(TFContext, x)
57 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
66 const char **output_names, uint32_t nb_output,
AVFrame *out_frame,
77 unsigned char *graph_data =
NULL;
79 long size, bytes_read;
94 if (bytes_read !=
size){
99 graph_buf = TF_NewBuffer();
100 graph_buf->data = graph_data;
101 graph_buf->length =
size;
115 size =
sizeof(float);
125 return TF_AllocateTensor(dt, input_dims, 4,
126 input_dims[1] * input_dims[2] * input_dims[3] *
size);
137 tf_output.oper = TF_GraphOperationByName(tf_model->
graph, input_name);
138 if (!tf_output.oper) {
144 input->
dt = TF_OperationOutputType(tf_output);
146 status = TF_NewStatus();
147 TF_GraphGetTensorShape(tf_model->
graph, tf_output, dims, 4, status);
148 if (TF_GetCode(status) != TF_OK){
149 TF_DeleteStatus(status);
153 TF_DeleteStatus(status);
158 input->
width = dims[2];
165 const char *output_name,
int *output_width,
int *output_height)
185 in_frame->
width = input_width;
186 in_frame->
height = input_height;
189 *output_width = out_frame->
width;
190 *output_height = out_frame->
height;
200 TF_Buffer *graph_def;
201 TF_ImportGraphDefOptions *graph_opts;
202 TF_SessionOptions *sess_opts;
203 const TF_Operation *init_op;
205 int sess_config_length = 0;
234 if (sess_config_length % 2 != 0) {
236 "please re-generate the config.\n",
241 sess_config_length -= 2;
242 sess_config_length /= 2;
244 sess_config =
av_malloc(sess_config_length);
250 for (
int i = 0;
i < sess_config_length;
i++) {
251 int index = 2 + (sess_config_length - 1 -
i) * 2;
254 sess_config[
i] = strtol(
tmp,
NULL, 16);
264 tf_model->
graph = TF_NewGraph();
265 tf_model->
status = TF_NewStatus();
266 graph_opts = TF_NewImportGraphDefOptions();
267 TF_GraphImportGraphDef(tf_model->
graph, graph_def, graph_opts, tf_model->
status);
268 TF_DeleteImportGraphDefOptions(graph_opts);
269 TF_DeleteBuffer(graph_def);
270 if (TF_GetCode(tf_model->
status) != TF_OK){
271 TF_DeleteGraph(tf_model->
graph);
272 TF_DeleteStatus(tf_model->
status);
278 init_op = TF_GraphOperationByName(tf_model->
graph,
"init");
279 sess_opts = TF_NewSessionOptions();
282 TF_SetConfig(sess_opts, sess_config, sess_config_length,tf_model->
status);
284 if (TF_GetCode(tf_model->
status) != TF_OK) {
292 TF_DeleteSessionOptions(sess_opts);
293 if (TF_GetCode(tf_model->
status) != TF_OK)
305 if (TF_GetCode(tf_model->
status) != TF_OK)
315 #define NAME_BUFFER_SIZE 256
322 TF_OperationDescription *op_desc;
324 int64_t strides[] = {1, 1, 1, 1};
335 op_desc = TF_NewOperation(tf_model->
graph,
"Const", name_buffer);
336 TF_SetAttrType(op_desc,
"dtype", TF_FLOAT);
342 tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len,
size *
sizeof(
float));
343 memcpy(TF_TensorData(tensor), params->
kernel,
size *
sizeof(
float));
344 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
345 if (TF_GetCode(tf_model->
status) != TF_OK){
349 op = TF_FinishOperation(op_desc, tf_model->
status);
350 if (TF_GetCode(tf_model->
status) != TF_OK){
356 op_desc = TF_NewOperation(tf_model->
graph,
"Transpose", name_buffer);
358 TF_AddInput(op_desc, input);
359 input.oper = transpose_op;
360 TF_AddInput(op_desc, input);
361 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
362 TF_SetAttrType(op_desc,
"Tperm", TF_INT32);
363 op = TF_FinishOperation(op_desc, tf_model->
status);
364 if (TF_GetCode(tf_model->
status) != TF_OK){
370 op_desc = TF_NewOperation(tf_model->
graph,
"Conv2D", name_buffer);
371 input.oper = *cur_op;
372 TF_AddInput(op_desc, input);
374 TF_AddInput(op_desc, input);
375 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
376 TF_SetAttrIntList(op_desc,
"strides", strides, 4);
377 TF_SetAttrString(op_desc,
"padding",
"VALID", 5);
378 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
379 if (TF_GetCode(tf_model->
status) != TF_OK){
385 op_desc = TF_NewOperation(tf_model->
graph,
"Const", name_buffer);
386 TF_SetAttrType(op_desc,
"dtype", TF_FLOAT);
389 tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->
output_num *
sizeof(
float));
390 memcpy(TF_TensorData(tensor), params->
biases, params->
output_num *
sizeof(
float));
391 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
392 if (TF_GetCode(tf_model->
status) != TF_OK){
396 op = TF_FinishOperation(op_desc, tf_model->
status);
397 if (TF_GetCode(tf_model->
status) != TF_OK){
403 op_desc = TF_NewOperation(tf_model->
graph,
"BiasAdd", name_buffer);
404 input.oper = *cur_op;
405 TF_AddInput(op_desc, input);
407 TF_AddInput(op_desc, input);
408 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
409 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
410 if (TF_GetCode(tf_model->
status) != TF_OK){
418 op_desc = TF_NewOperation(tf_model->
graph,
"Relu", name_buffer);
421 op_desc = TF_NewOperation(tf_model->
graph,
"Tanh", name_buffer);
424 op_desc = TF_NewOperation(tf_model->
graph,
"Sigmoid", name_buffer);
430 input.oper = *cur_op;
431 TF_AddInput(op_desc, input);
432 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
433 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
434 if (TF_GetCode(tf_model->
status) != TF_OK){
446 TF_OperationDescription *op_desc;
451 op_desc = TF_NewOperation(tf_model->
graph,
"DepthToSpace", name_buffer);
452 input.oper = *cur_op;
454 TF_AddInput(op_desc, input);
455 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
456 TF_SetAttrInt(op_desc,
"block_size", params->
block_size);
457 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
458 if (TF_GetCode(tf_model->
status) != TF_OK){
472 TF_OperationDescription *op_desc;
480 op_desc = TF_NewOperation(tf_model->
graph,
"Const", name_buffer);
481 TF_SetAttrType(op_desc,
"dtype", TF_INT32);
482 tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 *
sizeof(
int32_t));
483 pads = (
int32_t *)TF_TensorData(tensor);
492 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
493 if (TF_GetCode(tf_model->
status) != TF_OK){
497 op = TF_FinishOperation(op_desc, tf_model->
status);
498 if (TF_GetCode(tf_model->
status) != TF_OK){
503 op_desc = TF_NewOperation(tf_model->
graph,
"MirrorPad",
"mirror_pad");
504 input.oper = *cur_op;
506 TF_AddInput(op_desc, input);
508 TF_AddInput(op_desc, input);
509 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
510 TF_SetAttrType(op_desc,
"Tpaddings", TF_INT32);
511 TF_SetAttrString(op_desc,
"mode",
"SYMMETRIC", 9);
512 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
513 if (TF_GetCode(tf_model->
status) != TF_OK){
527 TF_OperationDescription *op_desc;
534 op_desc = TF_NewOperation(tf_model->
graph,
"Const", name_buffer);
535 TF_SetAttrType(op_desc,
"dtype", TF_FLOAT);
536 tensor = TF_AllocateTensor(TF_FLOAT,
NULL, 0, TF_DataTypeSize(TF_FLOAT));
537 y = (
float *)TF_TensorData(tensor);
539 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
540 if (TF_GetCode(tf_model->
status) != TF_OK){
544 op = TF_FinishOperation(op_desc, tf_model->
status);
545 if (TF_GetCode(tf_model->
status) != TF_OK){
551 op_desc = TF_NewOperation(tf_model->
graph,
"Maximum", name_buffer);
552 input.oper = *cur_op;
554 TF_AddInput(op_desc, input);
556 TF_AddInput(op_desc, input);
557 TF_SetAttrType(op_desc,
"T", TF_FLOAT);
558 *cur_op = TF_FinishOperation(op_desc, tf_model->
status);
559 if (TF_GetCode(tf_model->
status) != TF_OK){
571 TF_OperationDescription *op_desc;
573 TF_Operation *transpose_op;
577 int64_t transpose_perm_shape[] = {4};
578 int64_t input_shape[] = {1, -1, -1, -1};
589 native_model = model->
model;
590 tf_model->
graph = TF_NewGraph();
591 tf_model->
status = TF_NewStatus();
593 #define CLEANUP_ON_ERROR(tf_model) \
595 TF_DeleteGraph(tf_model->graph); \
596 TF_DeleteStatus(tf_model->status); \
597 av_log(ctx, AV_LOG_ERROR, "Failed to set value or add operator to layer\n"); \
601 op_desc = TF_NewOperation(tf_model->
graph,
"Placeholder",
"x");
602 TF_SetAttrType(op_desc,
"dtype", TF_FLOAT);
603 TF_SetAttrShape(op_desc,
"shape", input_shape, 4);
604 op = TF_FinishOperation(op_desc, tf_model->
status);
605 if (TF_GetCode(tf_model->
status) != TF_OK){
609 op_desc = TF_NewOperation(tf_model->
graph,
"Const",
"transpose_perm");
610 TF_SetAttrType(op_desc,
"dtype", TF_INT32);
611 tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 *
sizeof(
int32_t));
617 TF_SetAttrTensor(op_desc,
"value", tensor, tf_model->
status);
618 if (TF_GetCode(tf_model->
status) != TF_OK){
621 transpose_op = TF_FinishOperation(op_desc, tf_model->
status);
623 for (layer = 0; layer < native_model->
layers_num; ++layer){
653 op_desc = TF_NewOperation(tf_model->
graph,
"Identity",
"y");
656 TF_AddInput(op_desc, input);
657 TF_FinishOperation(op_desc, tf_model->
status);
658 if (TF_GetCode(tf_model->
status) != TF_OK){
682 tf_model->
ctx.
class = &dnn_tensorflow_class;
683 tf_model->
model = model;
703 model->
model = tf_model;
714 const char **output_names, uint32_t nb_output,
AVFrame *out_frame,
717 TF_Output *tf_outputs;
721 TF_Tensor **output_tensors;
723 TF_Tensor *input_tensor;
730 tf_input.oper = TF_GraphOperationByName(tf_model->
graph, input_name);
741 input.
data = (
float *)TF_TensorData(input_tensor);
751 if (nb_output != 1) {
759 if (tf_outputs ==
NULL) {
765 if (!output_tensors) {
771 for (
int i = 0;
i < nb_output; ++
i) {
772 tf_outputs[
i].oper = TF_GraphOperationByName(tf_model->
graph, output_names[
i]);
773 if (!tf_outputs[
i].oper) {
779 tf_outputs[
i].index = 0;
783 &tf_input, &input_tensor, 1,
784 tf_outputs, output_tensors, nb_output,
786 if (TF_GetCode(tf_model->
status) != TF_OK) {
793 for (uint32_t
i = 0;
i < nb_output; ++
i) {
794 output.
height = TF_Dim(output_tensors[
i], 1);
795 output.
width = TF_Dim(output_tensors[
i], 2);
796 output.
channels = TF_Dim(output_tensors[
i], 3);
797 output.
data = TF_TensorData(output_tensors[
i]);
798 output.
dt = TF_TensorType(output_tensors[
i]);
812 for (uint32_t
i = 0;
i < nb_output; ++
i) {
813 if (output_tensors[
i]) {
814 TF_DeleteTensor(output_tensors[
i]);
817 TF_DeleteTensor(input_tensor);
824 const char **output_names, uint32_t nb_output,
AVFrame *out_frame)
839 return execute_model_tf(model, input_name, in_frame, output_names, nb_output, out_frame, 1);
847 tf_model = (*model)->
model;
848 if (tf_model->
graph){
849 TF_DeleteGraph(tf_model->
graph);
856 TF_DeleteStatus(tf_model->
status);
simple assert() macros that are a bit more flexible than ISO C assert().
#define av_assert0(cond)
assert() equivalent, that is always enabled.
#define AVIO_FLAG_READ
read-only
int avio_open(AVIOContext **s, const char *url, int flags)
Create and initialize a AVIOContext for accessing the resource indicated by url.
int64_t avio_size(AVIOContext *s)
Get the filesize.
int avio_read(AVIOContext *s, unsigned char *buf, int size)
Read size bytes from AVIOContext into buf.
int avio_closep(AVIOContext **s)
Close the resource accessed by the AVIOContext *s, free it and set the pointer pointing to it to NULL...
void ff_dnn_free_model_native(DNNModel **model)
DNNModel * ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
DNN inference functions interface for native backend.
DNN inference functions interface for native backend.
DNN inference functions interface for native backend.
layer pad (equivalent to tf.pad) for native backend.
static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op, ConvolutionalParams *params, const int layer)
static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input_name)
static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op, DepthToSpaceParams *params, const int layer)
static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame, int do_ioproc)
static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame)
static TF_Tensor * allocate_input_tensor(const DNNData *input)
DNNModel * ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
#define CLEANUP_ON_ERROR(tf_model)
static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
AVFILTER_DEFINE_CLASS(dnn_tensorflow)
static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
static const AVOption dnn_tensorflow_options[]
static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op, LayerPadParams *params, const int layer)
static TF_Buffer * read_graph(const char *model_filename)
void ff_dnn_free_model_tf(DNNModel **model)
static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op, DnnLayerMaximumParams *params, const int layer)
static void free_buffer(void *data, size_t length)
DNN inference functions interface for TensorFlow backend.
DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx)
DNN input&output process between AVFrame and DNNData.
const OptionDef options[]
int av_opt_set_from_string(void *ctx, const char *opts, const char *const *shorthand, const char *key_val_sep, const char *pairs_sep)
Parse the key-value pairs list in opts.
void av_opt_set_defaults(void *s)
Set the values of all AVOption fields to their default values.
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
void * av_mallocz_array(size_t nmemb, size_t size)
Allocate a memory block for an array with av_mallocz().
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2
Log a generic warning message about a missing feature.
Describe the class of an AVClass context structure.
This structure describes decoded (raw) audio or video data.
DNNActivationFunc activation
int(* pre_proc)(AVFrame *frame_in, DNNData *model_input, AVFilterContext *filter_ctx)
int(* post_proc)(AVFrame *frame_out, DNNData *model_output, AVFilterContext *filter_ctx)
DNNReturnType(* get_output)(void *model, const char *input_name, int input_width, int input_height, const char *output_name, int *output_width, int *output_height)
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
AVFilterContext * filter_ctx
DNNFunctionType func_type
union DnnLayerMaximumParams::@202 val
#define av_malloc_array(a, b)
static FilteringContext * filter_ctx
static void transpose_perm(int16_t *out, int16_t *in, int num_vect, const uint8_t line_len[2], int length_div)
Interpret the input data as in the following table: