diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 1bc5d934f5..34e756c7fd 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -2225,6 +2225,9 @@ int cr_dump_tasks(pid_t pid)
 			goto err;
 	}
 
+	if(run_plugins(DUMP_DEVICE_LATE, pid))
+		goto err;
+
 	if (parent_ie) {
 		inventory_entry__free_unpacked(parent_ie, NULL);
 		parent_ie = NULL;
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index ddca6b8ece..0b4acb99b2 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -1651,6 +1651,9 @@ static int __restore_task_with_children(void *_arg)
 	if (open_transport_socket())
 		goto err;
 
+	if (run_plugins(RESUME_DEVICES_EARLY, current->pid->real))
+		goto err;
+
 	timing_start(TIME_FORK);
 
 	if (create_children_and_session())
diff --git a/criu/files-ext.c b/criu/files-ext.c
index 95ec8e37c3..05f576e1b6 100644
--- a/criu/files-ext.c
+++ b/criu/files-ext.c
@@ -45,10 +45,11 @@ static int open_fd(struct file_desc *d, int *new_fd)
 {
 	struct ext_file_info *xfi;
 	int fd;
+	bool retry_needed;
 
 	xfi = container_of(d, struct ext_file_info, d);
 
-	fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id);
+	fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id, &retry_needed);
 	if (fd < 0) {
 		pr_err("Unable to restore %#x\n", xfi->xfe->id);
 		return -1;
@@ -57,8 +58,11 @@ static int open_fd(struct file_desc *d, int *new_fd)
 	if (restore_fown(fd, xfi->xfe->fown))
 		return -1;
 
-	*new_fd = fd;
-	return 0;
+	if (!retry_needed)
+		*new_fd = fd;
+	else
+		*new_fd = -1;
+	return retry_needed;
 }
 
 static struct file_desc_ops ext_desc_ops = {
@@ -83,14 +87,14 @@ struct collect_image_info ext_file_cinfo = {
 	.collect = collect_one_ext,
 };
 
-int dump_unsupp_fd(struct fd_parms *p, int lfd, char *more, char *info, FdinfoEntry *e)
+int dump_unsupp_fd(struct fd_parms *p, int lfd, char *more, char *info, FdinfoEntry *e, bool force)
 {
 	int ret;
 
-	ret = do_dump_gen_file(p, lfd, &ext_dump_ops, e);
+	ret = do_dump_gen_file(p, lfd, &ext_dump_ops, e, force);
 	if (ret == 0)
 		return 0;
 	if (ret == -ENOTSUP)
 		pr_err("Can't dump file %d of that type [%o] (%s %s)\n", p->fd, p->stat.st_mode, more, info);
-	return -1;
+	return ret;
 }
diff --git a/criu/files.c b/criu/files.c
index 31e705bcc5..07b74a0764 100644
--- a/criu/files.c
+++ b/criu/files.c
@@ -329,7 +329,9 @@ uint32_t make_gen_id(uint32_t st_dev, uint32_t st_ino, uint64_t pos)
 	return st_dev ^ st_ino ^ pos_hi ^ pos_low;
 }
 
-int do_dump_gen_file(struct fd_parms *p, int lfd, const struct fdtype_ops *ops, FdinfoEntry *e)
+/* Use "force" to override cache */
+int do_dump_gen_file(struct fd_parms *p, int lfd,
+					const struct fdtype_ops *ops, FdinfoEntry *e, bool force)
 {
 	int ret = -1;
 
@@ -339,7 +341,7 @@ int do_dump_gen_file(struct fd_parms *p, int lfd, const struct fdtype_ops *ops,
 	e->flags = p->fd_flags;
 
 	ret = fd_id_generate(p->pid, e, p);
-	if (ret == 1) /* new ID generated */
+	if (ret == 1 || force) /* new ID generated */
 		ret = ops->dump(lfd, e->id, p);
 	else
 		/* Remove locks generated by the fd before going to the next */
@@ -484,19 +486,19 @@ static int dump_chrdev(struct fd_parms *p, int lfd, FdinfoEntry *e)
 		}
 
 		sprintf(more, "%d:%d", maj, minor(p->stat.st_rdev));
-		err = dump_unsupp_fd(p, lfd, "chr", more, e);
+		err = dump_unsupp_fd(p, lfd, "chr", more, e, false);
 		p->link = link_old;
 		return err;
 	}
 	}
 
-	err = do_dump_gen_file(p, lfd, ops, e);
+	err = do_dump_gen_file(p, lfd, ops, e, false);
 	p->link = link_old;
 	return err;
 }
 
 static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, struct parasite_ctl *ctl,
-			 FdinfoEntry *e, struct parasite_drain_fd *dfds)
+			 FdinfoEntry *e, struct parasite_drain_fd *dfds, bool force)
 {
 	struct fd_parms p = FD_PARMS_INIT;
 	const struct fdtype_ops *ops;
@@ -552,14 +554,14 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
 			ops = &bpfmap_dump_ops;
 #endif
 		else
-			return dump_unsupp_fd(&p, lfd, "anon", link, e);
+			return dump_unsupp_fd(&p, lfd, "anon", link, e, force);
 
-		return do_dump_gen_file(&p, lfd, ops, e);
+		return do_dump_gen_file(&p, lfd, ops, e, force);
 	}
 
 	if (p.fs_type == PID_FS_MAGIC) {
 		ops = &pidfd_dump_ops;
-		return do_dump_gen_file(&p, lfd, ops, e);
+		return do_dump_gen_file(&p, lfd, ops, e, force);
 	}
 
 	if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode) || S_ISLNK(p.stat.st_mode)) {
@@ -576,9 +578,9 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
 		else if (check_ns_proc(&link))
 			ops = &nsfile_dump_ops;
 		else
-			return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e);
+			return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e, force);
 
-		return do_dump_gen_file(&p, lfd, ops, e);
+		return do_dump_gen_file(&p, lfd, ops, e, force);
 	}
 
 	if (S_ISFIFO(p.stat.st_mode)) {
@@ -587,7 +589,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
 		else
 			ops = &fifo_dump_ops;
 
-		return do_dump_gen_file(&p, lfd, ops, e);
+		return do_dump_gen_file(&p, lfd, ops, e, force);
 	}
 
 	/*
@@ -598,7 +600,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
 	if (fill_fdlink(lfd, &p, &link))
 		memzero(&link, sizeof(link));
 
-	return dump_unsupp_fd(&p, lfd, "unknown", link.name + 1, e);
+	return dump_unsupp_fd(&p, lfd, "unknown", link.name + 1, e, force);
 }
 
 int dump_my_file(int lfd, u32 *id, int *type)
@@ -610,7 +612,7 @@ int dump_my_file(int lfd, u32 *id, int *type)
 	me.real = getpid();
 	me.ns[0].virt = -1; /* FIXME */
 
-	if (dump_one_file(&me, lfd, lfd, &fdo, NULL, &e, NULL))
+	if (dump_one_file(&me, lfd, lfd, &fdo, NULL, &e, NULL, false))
 		return -1;
 
 	*id = e.id;
@@ -625,6 +627,8 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s
 	struct fd_opts *opts = NULL;
 	int i, ret = -1;
 	int off, nr_fds = min((int)PARASITE_MAX_FDS, dfds->nr_fds);
+	int *retry_indices = NULL;
+	int retry_count = 0;
 
 	pr_info("\n");
 	pr_info("Dumping opened files (pid: %d)\n", item->pid->real);
@@ -642,6 +646,10 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s
 	if (!img)
 		goto err;
 
+	retry_indices = xmalloc(dfds->nr_fds * sizeof(int)); /* Allocate memory for retry indices*/
+	if (!retry_indices)
+		goto err;
+
 	ret = 0; /* Don't fail if nr_fds == 0 */
 	for (off = 0; ret == 0 && off < dfds->nr_fds; off += nr_fds) {
 		if (nr_fds + off > dfds->nr_fds)
@@ -654,9 +662,30 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s
 		for (i = 0; i < nr_fds; i++) {
 			FdinfoEntry e = FDINFO_ENTRY__INIT;
 
-			ret = dump_one_file(item->pid, dfds->fds[i + off], lfds[i], opts + i, ctl, &e, dfds);
+			ret = dump_one_file(item->pid, dfds->fds[i + off], lfds[i], opts + i, ctl, &e, dfds, false);
+			if (ret == -EAGAIN) {
+				retry_indices[retry_count++] = i;
+				ret = 0; //* Reset ret to continue the loop */
+				continue;
+			} else if (ret)
+				break;
+
+			ret = pb_write_one(img, &e, PB_FDINFO);
 			if (ret)
 				break;
+		}
+		/* Some handles like dmabuf handles should be dealt with after
+		drm handles. Do that here, if necessary. */
+		for (i = 0; i < retry_count; i++) {
+			int idx = retry_indices[i];
+			FdinfoEntry e = FDINFO_ENTRY__INIT;
+
+			ret = dump_one_file(item->pid, dfds->fds[idx + off], lfds[idx],
+											opts + idx, ctl, &e, dfds, true);
+			if (ret) {
+				pr_err("Retry failed for fd index %d\n", idx);
+				break;
+			}
 
 			ret = pb_write_one(img, &e, PB_FDINFO);
 			if (ret)
@@ -673,6 +702,8 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s
 		close_image(img);
 	xfree(opts);
 	xfree(lfds);
+	xfree(retry_indices);
+
 	return ret;
 }
 
@@ -836,6 +867,9 @@ struct fdinfo_list_entry *collect_fd_to(int pid, FdinfoEntry *e, struct rst_info
 {
 	struct fdinfo_list_entry *new_le;
 
+	if (fdesc->ops->type == FD_TYPES__EXT)
+		run_plugins(COLLECT_FILE, pid, fdesc->id);
+
 	new_le = alloc_fle(pid, e);
 	if (new_le) {
 		new_le->fake = (!!fake);
diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h
index 392ea9f534..b2a3ffce97 100644
--- a/criu/include/criu-plugin.h
+++ b/criu/include/criu-plugin.h
@@ -60,6 +60,12 @@ enum {
 
 	CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11,
 
+	CR_PLUGIN_HOOK__RESUME_DEVICES_EARLY = 12,
+
+	CR_PLUGIN_HOOK__COLLECT_FILE = 13,
+
+	CR_PLUGIN_HOOK__DUMP_DEVICE_LATE = 14,
+
 	CR_PLUGIN_HOOK__MAX
 };
 
@@ -68,7 +74,7 @@ enum {
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_UNIX_SK, int fd, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_UNIX_SK, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_FILE, int fd, int id);
-DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id, bool *retry_needed);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind);
@@ -78,6 +84,9 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid);
 DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_EARLY, int pid);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__COLLECT_FILE, int pid, int fd);
+DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_DEVICE_LATE, int id);
 
 enum {
 	CR_PLUGIN_STAGE__DUMP,
diff --git a/criu/include/files.h b/criu/include/files.h
index 31ebb0ca0f..48c74dcba0 100644
--- a/criu/include/files.h
+++ b/criu/include/files.h
@@ -138,7 +138,8 @@ struct fdtype_ops {
 struct cr_img;
 
 extern int dump_my_file(int lfd, u32 *, int *type);
-extern int do_dump_gen_file(struct fd_parms *p, int lfd, const struct fdtype_ops *ops, FdinfoEntry *e);
+extern int do_dump_gen_file(struct fd_parms *p, int lfd,
+					const struct fdtype_ops *ops, FdinfoEntry *e, bool force);
 struct parasite_drain_fd;
 int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, struct parasite_drain_fd *dfds);
 int predump_task_files(int pid);
@@ -177,7 +178,8 @@ extern int close_old_fds(void);
 extern int shared_fdt_prepare(struct pstree_item *item);
 
 extern struct collect_image_info ext_file_cinfo;
-extern int dump_unsupp_fd(struct fd_parms *p, int lfd, char *more, char *info, FdinfoEntry *);
+extern int dump_unsupp_fd(struct fd_parms *p, int lfd, char *more,
+								char *info, FdinfoEntry *, bool force);
 
 extern int inherit_fd_parse(char *optarg);
 extern int inherit_fd_add(int fd, char *key);
diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h
index 4265d94edd..f74d8ef430 100644
--- a/criu/include/servicefd.h
+++ b/criu/include/servicefd.h
@@ -47,5 +47,6 @@ extern int install_service_fd(enum sfd_type type, int fd);
 extern int close_service_fd(enum sfd_type type);
 extern void __close_service_fd(enum sfd_type type);
 extern int clone_service_fd(struct pstree_item *me);
+extern int get_unused_high_fd(void);
 
 #endif /* __CR_SERVICE_FD_H__ */
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 6d048c3f1d..3a56ed210e 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1920,6 +1920,10 @@ __visible long __export_restore_task(struct task_restore_args *args)
 
 		for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) {
 			if (vma_entry->madv & (1ul << m)) {
+
+				if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
+					continue;
+
 				ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m);
 				if (ret) {
 					pr_err("madvise(%" PRIx64 ", %" PRIu64 ", %ld) "
diff --git a/criu/plugin.c b/criu/plugin.c
index 65e79a0692..a0f27616c8 100644
--- a/criu/plugin.c
+++ b/criu/plugin.c
@@ -59,6 +59,9 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path)
 	__assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late");
 	__assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices");
 	__assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices");
+	__assign_hook(RESUME_DEVICES_EARLY, "cr_plugin_resume_devices_early");
+	__assign_hook(COLLECT_FILE, "cr_plugin_collect_file");
+	__assign_hook(DUMP_DEVICE_LATE, "cr_plugin_dump_device_late");
 
 #undef __assign_hook
 
diff --git a/criu/servicefd.c b/criu/servicefd.c
index 06a8d3ebaf..5034a19f3c 100644
--- a/criu/servicefd.c
+++ b/criu/servicefd.c
@@ -25,6 +25,7 @@ int service_fd_rlim_cur;
 
 /* Base of current process service fds set */
 static int service_fd_base;
+static int next_high_fd;
 
 /* Id of current process in shared fdt */
 static int service_fd_id = 0;
@@ -312,5 +313,15 @@ int clone_service_fd(struct pstree_item *me)
 	service_fd_id = id;
 	ret = 0;
 
+	next_high_fd = service_fd_base + 1024;
+
 	return ret;
 }
+
+int get_unused_high_fd(void)
+{
+	if (next_high_fd > service_fd_rlim_cur)
+		return -1;
+	next_high_fd += 1;
+	return next_high_fd - 1;
+}
diff --git a/criu/sockets.c b/criu/sockets.c
index f9ce999bed..752fb31a92 100644
--- a/criu/sockets.c
+++ b/criu/sockets.c
@@ -754,7 +754,7 @@ int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *e)
 		return -1;
 	}
 
-	return do_dump_gen_file(p, lfd, ops, e);
+	return do_dump_gen_file(p, lfd, ops, e, false);
 }
 
 static int inet_receive_one(struct nlmsghdr *h, struct ns_id *ns, void *arg)
diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile
index a20d1d1639..daee5acf6a 100644
--- a/plugins/amdgpu/Makefile
+++ b/plugins/amdgpu/Makefile
@@ -27,7 +27,7 @@ endif
 criu-amdgpu.pb-c.c: criu-amdgpu.proto
 		protoc-c --proto_path=. --c_out=. criu-amdgpu.proto
 
-amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c
+amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_dmabuf.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c
 	$(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC)
 
 amdgpu_plugin_clean:
diff --git a/plugins/amdgpu/amdgpu_drm.h b/plugins/amdgpu/amdgpu_drm.h
new file mode 100644
index 0000000000..d0a46b27e1
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_drm.h
@@ -0,0 +1,1640 @@
+/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*-
+ *
+ * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
+ * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kevin E. Martin <martin@valinux.com>
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __AMDGPU_DRM_H__
+#define __AMDGPU_DRM_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_AMDGPU_GEM_CREATE		0x00
+#define DRM_AMDGPU_GEM_MMAP		0x01
+#define DRM_AMDGPU_CTX			0x02
+#define DRM_AMDGPU_BO_LIST		0x03
+#define DRM_AMDGPU_CS			0x04
+#define DRM_AMDGPU_INFO			0x05
+#define DRM_AMDGPU_GEM_METADATA		0x06
+#define DRM_AMDGPU_GEM_WAIT_IDLE	0x07
+#define DRM_AMDGPU_GEM_VA		0x08
+#define DRM_AMDGPU_WAIT_CS		0x09
+#define DRM_AMDGPU_GEM_OP		0x10
+#define DRM_AMDGPU_GEM_USERPTR		0x11
+#define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_VM			0x13
+#define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
+#define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
+#define DRM_AMDGPU_USERQ_SIGNAL		0x17
+#define DRM_AMDGPU_USERQ_WAIT		0x18
+#define DRM_AMDGPU_CRIU_OP		0x19
+
+#define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
+#define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
+#define DRM_IOCTL_AMDGPU_CTX		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx)
+#define DRM_IOCTL_AMDGPU_BO_LIST	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list)
+#define DRM_IOCTL_AMDGPU_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs)
+#define DRM_IOCTL_AMDGPU_INFO		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info)
+#define DRM_IOCTL_AMDGPU_GEM_METADATA	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata)
+#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle)
+#define DRM_IOCTL_AMDGPU_GEM_VA		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va)
+#define DRM_IOCTL_AMDGPU_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
+#define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
+#define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+#define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
+#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
+#define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
+#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
+#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
+#define DRM_IOCTL_AMDGPU_CRIU_OP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CRIU_OP, struct drm_amdgpu_criu_args)
+
+/**
+ * DOC: memory domains
+ *
+ * %AMDGPU_GEM_DOMAIN_CPU	System memory that is not GPU accessible.
+ * Memory in this pool could be swapped out to disk if there is pressure.
+ *
+ * %AMDGPU_GEM_DOMAIN_GTT	GPU accessible system memory, mapped into the
+ * GPU's virtual address space via gart. Gart memory linearizes non-contiguous
+ * pages of system memory, allows GPU access system memory in a linearized
+ * fashion.
+ *
+ * %AMDGPU_GEM_DOMAIN_VRAM	Local video memory. For APUs, it is memory
+ * carved out by the BIOS.
+ *
+ * %AMDGPU_GEM_DOMAIN_GDS	Global on-chip data storage used to share data
+ * across shader threads.
+ *
+ * %AMDGPU_GEM_DOMAIN_GWS	Global wave sync, used to synchronize the
+ * execution of all the waves on a device.
+ *
+ * %AMDGPU_GEM_DOMAIN_OA	Ordered append, used by 3D or Compute engines
+ * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_DOORBELL	Doorbell. It is an MMIO region for
+ * signalling user mode queues.
+ */
+#define AMDGPU_GEM_DOMAIN_CPU		0x1
+#define AMDGPU_GEM_DOMAIN_GTT		0x2
+#define AMDGPU_GEM_DOMAIN_VRAM		0x4
+#define AMDGPU_GEM_DOMAIN_GDS		0x8
+#define AMDGPU_GEM_DOMAIN_GWS		0x10
+#define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_DOORBELL	0x40
+#define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
+					 AMDGPU_GEM_DOMAIN_GTT | \
+					 AMDGPU_GEM_DOMAIN_VRAM | \
+					 AMDGPU_GEM_DOMAIN_GDS | \
+					 AMDGPU_GEM_DOMAIN_GWS | \
+					 AMDGPU_GEM_DOMAIN_OA | \
+					 AMDGPU_GEM_DOMAIN_DOORBELL)
+
+/* Flag that CPU access will be required for the case of VRAM domain */
+#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
+/* Flag that CPU access will not work, this VRAM domain is invisible */
+#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS		(1 << 1)
+/* Flag that USWC attributes should be used for GTT */
+#define AMDGPU_GEM_CREATE_CPU_GTT_USWC		(1 << 2)
+/* Flag that the memory should be in VRAM and cleared */
+#define AMDGPU_GEM_CREATE_VRAM_CLEARED		(1 << 3)
+/* Flag that allocating the BO should use linear VRAM */
+#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is always valid in this VM */
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
+/* Flag that BO sharing will be explicitly synchronized */
+#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC		(1 << 7)
+/* Flag that indicates allocating MQD gart on GFX9, where the mtype
+ * for the second page onward should be set to NC. It should never
+ * be used by user space applications.
+ */
+#define AMDGPU_GEM_CREATE_CP_MQD_GFX9		(1 << 8)
+/* Flag that BO may contain sensitive data that must be wiped before
+ * releasing the memory
+ */
+#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE	(1 << 9)
+/* Flag that BO will be encrypted and that the TMZ bit should be
+ * set in the PTEs when mapping this buffer via GPUVM or
+ * accessing it with various hw blocks
+ */
+#define AMDGPU_GEM_CREATE_ENCRYPTED		(1 << 10)
+/* Flag that BO will be used only in preemptible context, which does
+ * not require GTT memory accounting
+ */
+#define AMDGPU_GEM_CREATE_PREEMPTIBLE		(1 << 11)
+/* Flag that BO can be discarded under memory pressure without keeping the
+ * content.
+ */
+#define AMDGPU_GEM_CREATE_DISCARDABLE		(1 << 12)
+/* Flag that BO is shared coherently between multiple devices or CPU threads.
+ * May depend on GPU instructions to flush caches to system scope explicitly.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_COHERENT		(1 << 13)
+/* Flag that BO should not be cached by GPU. Coherent without having to flush
+ * GPU caches explicitly
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_UNCACHED		(1 << 14)
+/* Flag that BO should be coherent across devices when using device-level
+ * atomics. May depend on GPU instructions to flush caches to device scope
+ * explicitly, promoting them to system scope automatically.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_EXT_COHERENT		(1 << 15)
+/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */
+#define AMDGPU_GEM_CREATE_GFX12_DCC		(1 << 16)
+
+struct drm_amdgpu_gem_create_in  {
+	/** the requested memory size */
+	__u64 bo_size;
+	/** physical start_addr alignment in bytes for some HW requirements */
+	__u64 alignment;
+	/** the requested memory domains */
+	__u64 domains;
+	/** allocation flags */
+	__u64 domain_flags;
+};
+
+struct drm_amdgpu_gem_create_out  {
+	/** returned GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+};
+
+union drm_amdgpu_gem_create {
+	struct drm_amdgpu_gem_create_in		in;
+	struct drm_amdgpu_gem_create_out	out;
+};
+
+/** Opcode to create new residency list.  */
+#define AMDGPU_BO_LIST_OP_CREATE	0
+/** Opcode to destroy previously created residency list */
+#define AMDGPU_BO_LIST_OP_DESTROY	1
+/** Opcode to update resource information in the list */
+#define AMDGPU_BO_LIST_OP_UPDATE	2
+
+struct drm_amdgpu_bo_list_in {
+	/** Type of operation */
+	__u32 operation;
+	/** Handle of list or 0 if we want to create one */
+	__u32 list_handle;
+	/** Number of BOs in list  */
+	__u32 bo_number;
+	/** Size of each element describing BO */
+	__u32 bo_info_size;
+	/** Pointer to array describing BOs */
+	__u64 bo_info_ptr;
+};
+
+struct drm_amdgpu_bo_list_entry {
+	/** Handle of BO */
+	__u32 bo_handle;
+	/** New (if specified) BO priority to be used during migration */
+	__u32 bo_priority;
+};
+
+struct drm_amdgpu_bo_list_out {
+	/** Handle of resource list  */
+	__u32 list_handle;
+	__u32 _pad;
+};
+
+union drm_amdgpu_bo_list {
+	struct drm_amdgpu_bo_list_in in;
+	struct drm_amdgpu_bo_list_out out;
+};
+
+/* context related */
+#define AMDGPU_CTX_OP_ALLOC_CTX	1
+#define AMDGPU_CTX_OP_FREE_CTX	2
+#define AMDGPU_CTX_OP_QUERY_STATE	3
+#define AMDGPU_CTX_OP_QUERY_STATE2	4
+#define AMDGPU_CTX_OP_GET_STABLE_PSTATE	5
+#define AMDGPU_CTX_OP_SET_STABLE_PSTATE	6
+
+/* GPU reset status */
+#define AMDGPU_CTX_NO_RESET		0
+/* this the context caused it */
+#define AMDGPU_CTX_GUILTY_RESET		1
+/* some other context caused it */
+#define AMDGPU_CTX_INNOCENT_RESET	2
+/* unknown cause */
+#define AMDGPU_CTX_UNKNOWN_RESET	3
+
+/* indicate gpu reset occurred after ctx created */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET    (1<<0)
+/* indicate vram lost occurred after ctx created */
+#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
+/* indicate some job from this context once cause gpu hang */
+#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY   (1<<2)
+/* indicate some errors are detected by RAS */
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE   (1<<3)
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE   (1<<4)
+/* indicate that the reset hasn't completed yet */
+#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5)
+
+/* Context priority level */
+#define AMDGPU_CTX_PRIORITY_UNSET       -2048
+#define AMDGPU_CTX_PRIORITY_VERY_LOW    -1023
+#define AMDGPU_CTX_PRIORITY_LOW         -512
+#define AMDGPU_CTX_PRIORITY_NORMAL      0
+/*
+ * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires
+ * CAP_SYS_NICE or DRM_MASTER
+*/
+#define AMDGPU_CTX_PRIORITY_HIGH        512
+#define AMDGPU_CTX_PRIORITY_VERY_HIGH   1023
+
+/* select a stable profiling pstate for perfmon tools */
+#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK  0xf
+#define AMDGPU_CTX_STABLE_PSTATE_NONE  0
+#define AMDGPU_CTX_STABLE_PSTATE_STANDARD  1
+#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK  2
+#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
+#define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
+
+struct drm_amdgpu_ctx_in {
+	/** AMDGPU_CTX_OP_* */
+	__u32	op;
+	/** Flags */
+	__u32	flags;
+	__u32	ctx_id;
+	/** AMDGPU_CTX_PRIORITY_* */
+	__s32	priority;
+};
+
+union drm_amdgpu_ctx_out {
+		struct {
+			__u32	ctx_id;
+			__u32	_pad;
+		} alloc;
+
+		struct {
+			/** For future use, no flags defined so far */
+			__u64	flags;
+			/** Number of resets caused by this context so far. */
+			__u32	hangs;
+			/** Reset status since the last call of the ioctl. */
+			__u32	reset_status;
+		} state;
+
+		struct {
+			__u32	flags;
+			__u32	_pad;
+		} pstate;
+};
+
+union drm_amdgpu_ctx {
+	struct drm_amdgpu_ctx_in in;
+	union drm_amdgpu_ctx_out out;
+};
+
+/* user queue IOCTL operations */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+/*
+ * This structure is a container to pass input configuration
+ * info for all supported userqueue related operations.
+ * For operation AMDGPU_USERQ_OP_CREATE: user is expected
+ *  to set all fields, excep the parameter 'queue_id'.
+ * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
+ *  to be set is 'queue_id', eveything else is ignored.
+ */
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Queue id passed for operation USERQ_OP_FREE */
+	__u32	queue_id;
+	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
+	__u32   ip_type;
+	/**
+	 * @doorbell_handle: the handle of doorbell GEM object
+	 * associated with this userqueue client.
+	 */
+	__u32   doorbell_handle;
+	/**
+	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
+	 * Kernel will generate absolute doorbell offset using doorbell_handle
+	 * and doorbell_offset in the doorbell bo.
+	 */
+	__u32   doorbell_offset;
+	__u32 _pad;
+	/**
+	 * @queue_va: Virtual address of the GPU memory which holds the queue
+	 * object. The queue holds the workload packets.
+	 */
+	__u64   queue_va;
+	/**
+	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
+	 * aligned.
+	 */
+	__u64   queue_size;
+	/**
+	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 */
+	__u64   rptr_va;
+	/**
+	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 *
+	 * Queue, RPTR and WPTR can come from the same object, as long as the size
+	 * and alignment related requirements are met.
+	 */
+	__u64   wptr_va;
+	/**
+	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
+	 * the GPU to uniquely define and identify a usermode queue.
+	 *
+	 * MQD data can be of different size for different GPU IP/engine and
+	 * their respective versions/revisions, so this points to a __u64 *
+	 * which holds IP specific MQD of this usermode queue.
+	 */
+	__u64 mqd;
+	/**
+	 * @size: size of MQD data in bytes, it must match the MQD structure
+	 * size of the respective engine/revision defined in UAPI for ex, for
+	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
+	 */
+	__u64 mqd_size;
+};
+
+/* The structure to carry output of userqueue ops */
+struct drm_amdgpu_userq_out {
+	/**
+	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
+	 * queue ID to represent the newly created userqueue in the system, otherwise
+	 * it should be ignored.
+	 */
+	__u32	queue_id;
+	__u32 _pad;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
+/* GFX V11 IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_gfx11 {
+	/**
+	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   shadow_va;
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 SDMA IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_sdma_gfx11 {
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   csa_va;
+};
+
+/* GFX V11 Compute IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_compute_gfx11 {
+	/**
+	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   eop_va;
+};
+
+/* userq signal/wait ioctl */
+struct drm_amdgpu_userq_signal {
+	/**
+	 * @queue_id: Queue handle used by the userq fence creation function
+	 * to retrieve the WPTR.
+	 */
+	__u32	queue_id;
+	__u32	pad;
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to be signaled.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u64	num_syncobj_handles;
+	/**
+	 * @bo_read_handles: The list of BO handles that the submitted user queue job
+	 * is using for read only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of BO handles that the submitted user queue job
+	 * is using for write only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+};
+
+struct drm_amdgpu_userq_fence_info {
+	/**
+	 * @va: A gpu address allocated for each queue which stores the
+	 * read pointer (RPTR) value.
+	 */
+	__u64	va;
+	/**
+	 * @value: A 64 bit value represents the write pointer (WPTR) of the
+	 * queue commands which compared with the RPTR value to signal the
+	 * fences.
+	 */
+	__u64	value;
+};
+
+struct drm_amdgpu_userq_wait {
+	/**
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	syncobj_handles;
+	/**
+	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
+	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
+	 */
+	__u64	syncobj_timeline_handles;
+	/**
+	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
+	 * user queue job for the corresponding @syncobj_timeline_handles.
+	 */
+	__u64	syncobj_timeline_points;
+	/**
+	 * @bo_read_handles: The list of read BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of write BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
+	 * syncobj handles in @syncobj_timeline_handles.
+	 */
+	__u16	num_syncobj_timeline_handles;
+	/**
+	 * @num_fences: This field can be used both as input and output. As input it defines
+	 * the maximum number of fences that can be returned and as output it will specify
+	 * how many fences were actually returned from the ioctl.
+	 */
+	__u16	num_fences;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles.
+	 */
+	__u32	num_syncobj_handles;
+	/**
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_bo_read_handles;
+	/**
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
+	 */
+	__u32	num_bo_write_handles;
+	/**
+	 * @out_fences: The field is a return value from the ioctl containing the list of
+	 * address/value pairs to wait for.
+	 */
+	__u64	out_fences;
+};
+
+/* vm ioctl */
+#define AMDGPU_VM_OP_RESERVE_VMID	1
+#define AMDGPU_VM_OP_UNRESERVE_VMID	2
+
+struct drm_amdgpu_vm_in {
+	/** AMDGPU_VM_OP_* */
+	__u32	op;
+	__u32	flags;
+};
+
+struct drm_amdgpu_vm_out {
+	/** For future use, no flags defined so far */
+	__u64	flags;
+};
+
+union drm_amdgpu_vm {
+	struct drm_amdgpu_vm_in in;
+	struct drm_amdgpu_vm_out out;
+};
+
+/* sched ioctl */
+#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE	1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE	2
+
+struct drm_amdgpu_sched_in {
+	/* AMDGPU_SCHED_OP_* */
+	__u32	op;
+	__u32	fd;
+	/** AMDGPU_CTX_PRIORITY_* */
+	__s32	priority;
+	__u32   ctx_id;
+};
+
+union drm_amdgpu_sched {
+	struct drm_amdgpu_sched_in in;
+};
+
+/*
+ * This is not a reliable API and you should expect it to fail for any
+ * number of reasons and have fallback path that do not use userptr to
+ * perform any operation.
+ */
+#define AMDGPU_GEM_USERPTR_READONLY	(1 << 0)
+#define AMDGPU_GEM_USERPTR_ANONONLY	(1 << 1)
+#define AMDGPU_GEM_USERPTR_VALIDATE	(1 << 2)
+#define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)
+
+struct drm_amdgpu_gem_userptr {
+	__u64		addr;
+	__u64		size;
+	/* AMDGPU_GEM_USERPTR_* */
+	__u32		flags;
+	/* Resulting GEM handle */
+	__u32		handle;
+};
+
+/* SI-CI-VI: */
+/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
+#define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
+#define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
+#define AMDGPU_TILING_PIPE_CONFIG_SHIFT			4
+#define AMDGPU_TILING_PIPE_CONFIG_MASK			0x1f
+#define AMDGPU_TILING_TILE_SPLIT_SHIFT			9
+#define AMDGPU_TILING_TILE_SPLIT_MASK			0x7
+#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT		12
+#define AMDGPU_TILING_MICRO_TILE_MODE_MASK		0x7
+#define AMDGPU_TILING_BANK_WIDTH_SHIFT			15
+#define AMDGPU_TILING_BANK_WIDTH_MASK			0x3
+#define AMDGPU_TILING_BANK_HEIGHT_SHIFT			17
+#define AMDGPU_TILING_BANK_HEIGHT_MASK			0x3
+#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT		19
+#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK		0x3
+#define AMDGPU_TILING_NUM_BANKS_SHIFT			21
+#define AMDGPU_TILING_NUM_BANKS_MASK			0x3
+
+/* GFX9 - GFX11: */
+#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
+#define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
+#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT		5
+#define AMDGPU_TILING_DCC_OFFSET_256B_MASK		0xFFFFFF
+#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT		29
+#define AMDGPU_TILING_DCC_PITCH_MAX_MASK		0x3FFF
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT		43
+#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK		0x1
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT	44
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK		0x1
+#define AMDGPU_TILING_SCANOUT_SHIFT			63
+#define AMDGPU_TILING_SCANOUT_MASK			0x1
+
+/* GFX12 and later: */
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT			0
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK			0x7
+/* These are DCC recompression setting for memory management: */
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT	3
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK	0x3 /* 0:64B, 1:128B, 2:256B */
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT		5
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK		0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT		8
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK		0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */
+
+/* Set/Get helpers for tiling flags. */
+#define AMDGPU_TILING_SET(field, value) \
+	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
+#define AMDGPU_TILING_GET(value, field) \
+	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
+
+#define AMDGPU_GEM_METADATA_OP_SET_METADATA                  1
+#define AMDGPU_GEM_METADATA_OP_GET_METADATA                  2
+
+/** The same structure is shared for input/output */
+struct drm_amdgpu_gem_metadata {
+	/** GEM Object handle */
+	__u32	handle;
+	/** Do we want get or set metadata */
+	__u32	op;
+	struct {
+		/** For future use, no flags defined so far */
+		__u64	flags;
+		/** family specific tiling info */
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
+	} data;
+};
+
+struct drm_amdgpu_gem_mmap_in {
+	/** the GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+};
+
+struct drm_amdgpu_gem_mmap_out {
+	/** mmap offset from the vma offset manager */
+	__u64 addr_ptr;
+};
+
+union drm_amdgpu_gem_mmap {
+	struct drm_amdgpu_gem_mmap_in   in;
+	struct drm_amdgpu_gem_mmap_out out;
+};
+
+struct drm_amdgpu_gem_wait_idle_in {
+	/** GEM object handle */
+	__u32 handle;
+	/** For future use, no flags defined so far */
+	__u32 flags;
+	/** Absolute timeout to wait */
+	__u64 timeout;
+};
+
+struct drm_amdgpu_gem_wait_idle_out {
+	/** BO status:  0 - BO is idle, 1 - BO is busy */
+	__u32 status;
+	/** Returned current memory domain */
+	__u32 domain;
+};
+
+union drm_amdgpu_gem_wait_idle {
+	struct drm_amdgpu_gem_wait_idle_in  in;
+	struct drm_amdgpu_gem_wait_idle_out out;
+};
+
+struct drm_amdgpu_wait_cs_in {
+	/* Command submission handle
+         * handle equals 0 means none to wait for
+         * handle equals ~0ull means wait for the latest sequence number
+         */
+	__u64 handle;
+	/** Absolute timeout to wait */
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+};
+
+struct drm_amdgpu_wait_cs_out {
+	/** CS status:  0 - CS completed, 1 - CS still busy */
+	__u64 status;
+};
+
+union drm_amdgpu_wait_cs {
+	struct drm_amdgpu_wait_cs_in in;
+	struct drm_amdgpu_wait_cs_out out;
+};
+
+struct drm_amdgpu_fence {
+	__u32 ctx_id;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u64 seq_no;
+};
+
+struct drm_amdgpu_wait_fences_in {
+	/** This points to uint64_t * which points to fences */
+	__u64 fences;
+	__u32 fence_count;
+	__u32 wait_all;
+	__u64 timeout_ns;
+};
+
+struct drm_amdgpu_wait_fences_out {
+	__u32 status;
+	__u32 first_signaled;
+};
+
+union drm_amdgpu_wait_fences {
+	struct drm_amdgpu_wait_fences_in in;
+	struct drm_amdgpu_wait_fences_out out;
+};
+
+#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
+#define AMDGPU_GEM_OP_SET_PLACEMENT		1
+
+/* Sets or returns a value associated with a buffer. */
+struct drm_amdgpu_gem_op {
+	/** GEM object handle */
+	__u32	handle;
+	/** AMDGPU_GEM_OP_* */
+	__u32	op;
+	/** Input or return value */
+	__u64	value;
+};
+
+#define AMDGPU_VA_OP_MAP			1
+#define AMDGPU_VA_OP_UNMAP			2
+#define AMDGPU_VA_OP_CLEAR			3
+#define AMDGPU_VA_OP_REPLACE			4
+
+/* Delay the page table update till the next CS */
+#define AMDGPU_VM_DELAY_UPDATE		(1 << 0)
+
+/* Mapping flags */
+/* readable mapping */
+#define AMDGPU_VM_PAGE_READABLE		(1 << 1)
+/* writable mapping */
+#define AMDGPU_VM_PAGE_WRITEABLE	(1 << 2)
+/* executable mapping, new for VI */
+#define AMDGPU_VM_PAGE_EXECUTABLE	(1 << 3)
+/* partially resident texture */
+#define AMDGPU_VM_PAGE_PRT		(1 << 4)
+/* MTYPE flags use bit 5 to 8 */
+#define AMDGPU_VM_MTYPE_MASK		(0xf << 5)
+/* Default MTYPE. Pre-AI must use this.  Recommended for newer ASICs. */
+#define AMDGPU_VM_MTYPE_DEFAULT		(0 << 5)
+/* Use Non Coherent MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_NC		(1 << 5)
+/* Use Write Combine MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_WC		(2 << 5)
+/* Use Cache Coherent MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_CC		(3 << 5)
+/* Use UnCached MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_UC		(4 << 5)
+/* Use Read Write MTYPE instead of default MTYPE */
+#define AMDGPU_VM_MTYPE_RW		(5 << 5)
+/* don't allocate MALL */
+#define AMDGPU_VM_PAGE_NOALLOC		(1 << 9)
+
+struct drm_amdgpu_gem_va {
+	/** GEM object handle */
+	__u32 handle;
+	__u32 _pad;
+	/** AMDGPU_VA_OP_* */
+	__u32 operation;
+	/** AMDGPU_VM_PAGE_* */
+	__u32 flags;
+	/** va address to assign . Must be correctly aligned.*/
+	__u64 va_address;
+	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
+	__u64 offset_in_bo;
+	/** Specify mapping size. Must be correctly aligned. */
+	__u64 map_size;
+	/**
+	 * vm_timeline_point is a sequence number used to add new timeline point.
+	 */
+	__u64 vm_timeline_point;
+	/**
+	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
+	 * at vm_timeline_point.
+	 */
+	__u32 vm_timeline_syncobj_out;
+	/** the number of syncobj handles in @input_fence_syncobj_handles */
+	__u32 num_syncobj_handles;
+	/** Array of sync object handle to wait for given input fences */
+	__u64 input_fence_syncobj_handles;
+};
+
+#define AMDGPU_HW_IP_GFX          0
+#define AMDGPU_HW_IP_COMPUTE      1
+#define AMDGPU_HW_IP_DMA          2
+#define AMDGPU_HW_IP_UVD          3
+#define AMDGPU_HW_IP_VCE          4
+#define AMDGPU_HW_IP_UVD_ENC      5
+#define AMDGPU_HW_IP_VCN_DEC      6
+/*
+ * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support
+ * both encoding and decoding jobs.
+ */
+#define AMDGPU_HW_IP_VCN_ENC      7
+#define AMDGPU_HW_IP_VCN_JPEG     8
+#define AMDGPU_HW_IP_VPE          9
+#define AMDGPU_HW_IP_NUM          10
+
+#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
+
+#define AMDGPU_CHUNK_ID_IB		0x01
+#define AMDGPU_CHUNK_ID_FENCE		0x02
+#define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
+#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
+#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES	0x07
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT    0x08
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x09
+#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW   0x0a
+
+struct drm_amdgpu_cs_chunk {
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
+};
+
+struct drm_amdgpu_cs_in {
+	/** Rendering context id */
+	__u32		ctx_id;
+	/**  Handle of resource list associated with CS */
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		flags;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
+};
+
+struct drm_amdgpu_cs_out {
+	__u64 handle;
+};
+
+union drm_amdgpu_cs {
+	struct drm_amdgpu_cs_in in;
+	struct drm_amdgpu_cs_out out;
+};
+
+/* Specify flags to be used for IB */
+
+/* This IB should be submitted to CE */
+#define AMDGPU_IB_FLAG_CE	(1<<0)
+
+/* Preamble flag, which means the IB could be dropped if no context switch */
+#define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
+
+/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
+#define AMDGPU_IB_FLAG_PREEMPT (1<<2)
+
+/* The IB fence should do the L2 writeback but not invalidate any shader
+ * caches (L2/vL1/sL1/I$). */
+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+
+/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
+ * This will reset wave ID counters for the IB.
+ */
+#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+
+/* Flag the IB as secure (TMZ)
+ */
+#define AMDGPU_IB_FLAGS_SECURE  (1 << 5)
+
+/* Tell KMD to flush and invalidate caches
+ */
+#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC  (1 << 6)
+
+struct drm_amdgpu_cs_chunk_ib {
+	__u32 _pad;
+	/** AMDGPU_IB_FLAG_* */
+	__u32 flags;
+	/** Virtual address to begin IB execution */
+	__u64 va_start;
+	/** Size of submission */
+	__u32 ib_bytes;
+	/** HW IP to submit to */
+	__u32 ip_type;
+	/** HW IP index of the same type to submit to  */
+	__u32 ip_instance;
+	/** Ring index to submit to */
+	__u32 ring;
+};
+
+struct drm_amdgpu_cs_chunk_dep {
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+	__u64 handle;
+};
+
+struct drm_amdgpu_cs_chunk_fence {
+	__u32 handle;
+	__u32 offset;
+};
+
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
+struct drm_amdgpu_cs_chunk_syncobj {
+       __u32 handle;
+       __u32 flags;
+       __u64 point;
+};
+
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ	0
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD	1
+#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD	2
+
+union drm_amdgpu_fence_to_handle {
+	struct {
+		struct drm_amdgpu_fence fence;
+		__u32 what;
+		__u32 pad;
+	} in;
+	struct {
+		__u32 handle;
+	} out;
+};
+
+struct drm_amdgpu_cs_chunk_data {
+	union {
+		struct drm_amdgpu_cs_chunk_ib		ib_data;
+		struct drm_amdgpu_cs_chunk_fence	fence_data;
+	};
+};
+
+#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW         0x1
+
+struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
+	__u64 shadow_va;
+	__u64 csa_va;
+	__u64 gds_va;
+	__u64 flags;
+};
+
+/*
+ *  Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU
+ *
+ */
+#define AMDGPU_IDS_FLAGS_FUSION         0x1
+#define AMDGPU_IDS_FLAGS_PREEMPTION     0x2
+#define AMDGPU_IDS_FLAGS_TMZ            0x4
+#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8
+
+/* indicate if acceleration can be working */
+#define AMDGPU_INFO_ACCEL_WORKING		0x00
+/* get the crtc_id from the mode object id? */
+#define AMDGPU_INFO_CRTC_FROM_ID		0x01
+/* query hw IP info */
+#define AMDGPU_INFO_HW_IP_INFO			0x02
+/* query hw IP instance count for the specified type */
+#define AMDGPU_INFO_HW_IP_COUNT			0x03
+/* timestamp for GL_ARB_timer_query */
+#define AMDGPU_INFO_TIMESTAMP			0x05
+/* Query the firmware version */
+#define AMDGPU_INFO_FW_VERSION			0x0e
+	/* Subquery id: Query VCE firmware version */
+	#define AMDGPU_INFO_FW_VCE		0x1
+	/* Subquery id: Query UVD firmware version */
+	#define AMDGPU_INFO_FW_UVD		0x2
+	/* Subquery id: Query GMC firmware version */
+	#define AMDGPU_INFO_FW_GMC		0x03
+	/* Subquery id: Query GFX ME firmware version */
+	#define AMDGPU_INFO_FW_GFX_ME		0x04
+	/* Subquery id: Query GFX PFP firmware version */
+	#define AMDGPU_INFO_FW_GFX_PFP		0x05
+	/* Subquery id: Query GFX CE firmware version */
+	#define AMDGPU_INFO_FW_GFX_CE		0x06
+	/* Subquery id: Query GFX RLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC		0x07
+	/* Subquery id: Query GFX MEC firmware version */
+	#define AMDGPU_INFO_FW_GFX_MEC		0x08
+	/* Subquery id: Query SMC firmware version */
+	#define AMDGPU_INFO_FW_SMC		0x0a
+	/* Subquery id: Query SDMA firmware version */
+	#define AMDGPU_INFO_FW_SDMA		0x0b
+	/* Subquery id: Query PSP SOS firmware version */
+	#define AMDGPU_INFO_FW_SOS		0x0c
+	/* Subquery id: Query PSP ASD firmware version */
+	#define AMDGPU_INFO_FW_ASD		0x0d
+	/* Subquery id: Query VCN firmware version */
+	#define AMDGPU_INFO_FW_VCN		0x0e
+	/* Subquery id: Query GFX RLC SRLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f
+	/* Subquery id: Query GFX RLC SRLG firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10
+	/* Subquery id: Query GFX RLC SRLS firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
+	/* Subquery id: Query DMCU firmware version */
+	#define AMDGPU_INFO_FW_DMCU		0x12
+	#define AMDGPU_INFO_FW_TA		0x13
+	/* Subquery id: Query DMCUB firmware version */
+	#define AMDGPU_INFO_FW_DMCUB		0x14
+	/* Subquery id: Query TOC firmware version */
+	#define AMDGPU_INFO_FW_TOC		0x15
+	/* Subquery id: Query CAP firmware version */
+	#define AMDGPU_INFO_FW_CAP		0x16
+	/* Subquery id: Query GFX RLCP firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLCP		0x17
+	/* Subquery id: Query GFX RLCV firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLCV		0x18
+	/* Subquery id: Query MES_KIQ firmware version */
+	#define AMDGPU_INFO_FW_MES_KIQ		0x19
+	/* Subquery id: Query MES firmware version */
+	#define AMDGPU_INFO_FW_MES		0x1a
+	/* Subquery id: Query IMU firmware version */
+	#define AMDGPU_INFO_FW_IMU		0x1b
+	/* Subquery id: Query VPE firmware version */
+	#define AMDGPU_INFO_FW_VPE		0x1c
+
+/* number of bytes moved for TTM migration */
+#define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
+/* the used VRAM size */
+#define AMDGPU_INFO_VRAM_USAGE			0x10
+/* the used GTT size */
+#define AMDGPU_INFO_GTT_USAGE			0x11
+/* Information about GDS, etc. resource configuration */
+#define AMDGPU_INFO_GDS_CONFIG			0x13
+/* Query information about VRAM and GTT domains */
+#define AMDGPU_INFO_VRAM_GTT			0x14
+/* Query information about register in MMR address space*/
+#define AMDGPU_INFO_READ_MMR_REG		0x15
+/* Query information about device: rev id, family, etc. */
+#define AMDGPU_INFO_DEV_INFO			0x16
+/* visible vram usage */
+#define AMDGPU_INFO_VIS_VRAM_USAGE		0x17
+/* number of TTM buffer evictions */
+#define AMDGPU_INFO_NUM_EVICTIONS		0x18
+/* Query memory about VRAM and GTT domains */
+#define AMDGPU_INFO_MEMORY			0x19
+/* Query vce clock table */
+#define AMDGPU_INFO_VCE_CLOCK_TABLE		0x1A
+/* Query vbios related information */
+#define AMDGPU_INFO_VBIOS			0x1B
+	/* Subquery id: Query vbios size */
+	#define AMDGPU_INFO_VBIOS_SIZE		0x1
+	/* Subquery id: Query vbios image */
+	#define AMDGPU_INFO_VBIOS_IMAGE		0x2
+	/* Subquery id: Query vbios info */
+	#define AMDGPU_INFO_VBIOS_INFO		0x3
+/* Query UVD handles */
+#define AMDGPU_INFO_NUM_HANDLES			0x1C
+/* Query sensor related information */
+#define AMDGPU_INFO_SENSOR			0x1D
+	/* Subquery id: Query GPU shader clock */
+	#define AMDGPU_INFO_SENSOR_GFX_SCLK		0x1
+	/* Subquery id: Query GPU memory clock */
+	#define AMDGPU_INFO_SENSOR_GFX_MCLK		0x2
+	/* Subquery id: Query GPU temperature */
+	#define AMDGPU_INFO_SENSOR_GPU_TEMP		0x3
+	/* Subquery id: Query GPU load */
+	#define AMDGPU_INFO_SENSOR_GPU_LOAD		0x4
+	/* Subquery id: Query average GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_AVG_POWER	0x5
+	/* Subquery id: Query northbridge voltage */
+	#define AMDGPU_INFO_SENSOR_VDDNB		0x6
+	/* Subquery id: Query graphics voltage */
+	#define AMDGPU_INFO_SENSOR_VDDGFX		0x7
+	/* Subquery id: Query GPU stable pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK		0x8
+	/* Subquery id: Query GPU stable pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK		0x9
+	/* Subquery id: Query GPU peak pstate shader clock */
+	#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK			0xa
+	/* Subquery id: Query GPU peak pstate memory clock */
+	#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK			0xb
+	/* Subquery id: Query input GPU power	*/
+	#define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER	0xc
+/* Number of VRAM page faults on CPU access. */
+#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
+#define AMDGPU_INFO_VRAM_LOST_COUNTER		0x1F
+/* query ras mask of enabled features*/
+#define AMDGPU_INFO_RAS_ENABLED_FEATURES	0x20
+/* RAS MASK: UMC (VRAM) */
+#define AMDGPU_INFO_RAS_ENABLED_UMC			(1 << 0)
+/* RAS MASK: SDMA */
+#define AMDGPU_INFO_RAS_ENABLED_SDMA			(1 << 1)
+/* RAS MASK: GFX */
+#define AMDGPU_INFO_RAS_ENABLED_GFX			(1 << 2)
+/* RAS MASK: MMHUB */
+#define AMDGPU_INFO_RAS_ENABLED_MMHUB			(1 << 3)
+/* RAS MASK: ATHUB */
+#define AMDGPU_INFO_RAS_ENABLED_ATHUB			(1 << 4)
+/* RAS MASK: PCIE */
+#define AMDGPU_INFO_RAS_ENABLED_PCIE			(1 << 5)
+/* RAS MASK: HDP */
+#define AMDGPU_INFO_RAS_ENABLED_HDP			(1 << 6)
+/* RAS MASK: XGMI */
+#define AMDGPU_INFO_RAS_ENABLED_XGMI			(1 << 7)
+/* RAS MASK: DF */
+#define AMDGPU_INFO_RAS_ENABLED_DF			(1 << 8)
+/* RAS MASK: SMN */
+#define AMDGPU_INFO_RAS_ENABLED_SMN			(1 << 9)
+/* RAS MASK: SEM */
+#define AMDGPU_INFO_RAS_ENABLED_SEM			(1 << 10)
+/* RAS MASK: MP0 */
+#define AMDGPU_INFO_RAS_ENABLED_MP0			(1 << 11)
+/* RAS MASK: MP1 */
+#define AMDGPU_INFO_RAS_ENABLED_MP1			(1 << 12)
+/* RAS MASK: FUSE */
+#define AMDGPU_INFO_RAS_ENABLED_FUSE			(1 << 13)
+/* query video encode/decode caps */
+#define AMDGPU_INFO_VIDEO_CAPS			0x21
+	/* Subquery id: Decode */
+	#define AMDGPU_INFO_VIDEO_CAPS_DECODE		0
+	/* Subquery id: Encode */
+	#define AMDGPU_INFO_VIDEO_CAPS_ENCODE		1
+/* Query the max number of IBs per gang per submission */
+#define AMDGPU_INFO_MAX_IBS			0x22
+/* query last page fault info */
+#define AMDGPU_INFO_GPUVM_FAULT			0x23
+/* query FW object size and alignment */
+#define AMDGPU_INFO_UQ_FW_AREAS			0x24
+
+#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
+#define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
+#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT	8
+#define AMDGPU_INFO_MMR_SH_INDEX_MASK	0xff
+
+struct drm_amdgpu_query_fw {
+	/** AMDGPU_INFO_FW_* */
+	__u32 fw_type;
+	/**
+	 * Index of the IP if there are more IPs of
+	 * the same type.
+	 */
+	__u32 ip_instance;
+	/**
+	 * Index of the engine. Whether this is used depends
+	 * on the firmware type. (e.g. MEC, SDMA)
+	 */
+	__u32 index;
+	__u32 _pad;
+};
+
+/* Input structure for the INFO ioctl */
+struct drm_amdgpu_info {
+	/* Where the return value will be stored */
+	__u64 return_pointer;
+	/* The size of the return value. Just like "size" in "snprintf",
+	 * it limits how many bytes the kernel can write. */
+	__u32 return_size;
+	/* The query request id. */
+	__u32 query;
+
+	union {
+		struct {
+			__u32 id;
+			__u32 _pad;
+		} mode_crtc;
+
+		struct {
+			/** AMDGPU_HW_IP_* */
+			__u32 type;
+			/**
+			 * Index of the IP if there are more IPs of the same
+			 * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
+			 */
+			__u32 ip_instance;
+		} query_hw_ip;
+
+		struct {
+			__u32 dword_offset;
+			/** number of registers to read */
+			__u32 count;
+			__u32 instance;
+			/** For future use, no flags defined so far */
+			__u32 flags;
+		} read_mmr_reg;
+
+		struct drm_amdgpu_query_fw query_fw;
+
+		struct {
+			__u32 type;
+			__u32 offset;
+		} vbios_info;
+
+		struct {
+			__u32 type;
+		} sensor_info;
+
+		struct {
+			__u32 type;
+		} video_cap;
+	};
+};
+
+struct drm_amdgpu_info_gds {
+	/** GDS GFX partition size */
+	__u32 gds_gfx_partition_size;
+	/** GDS compute partition size */
+	__u32 compute_partition_size;
+	/** total GDS memory size */
+	__u32 gds_total_size;
+	/** GWS size per GFX partition */
+	__u32 gws_per_gfx_partition;
+	/** GSW size per compute partition */
+	__u32 gws_per_compute_partition;
+	/** OA size per GFX partition */
+	__u32 oa_per_gfx_partition;
+	/** OA size per compute partition */
+	__u32 oa_per_compute_partition;
+	__u32 _pad;
+};
+
+struct drm_amdgpu_info_vram_gtt {
+	__u64 vram_size;
+	__u64 vram_cpu_accessible_size;
+	__u64 gtt_size;
+};
+
+struct drm_amdgpu_heap_info {
+	/** max. physical memory */
+	__u64 total_heap_size;
+
+	/** Theoretical max. available memory in the given heap */
+	__u64 usable_heap_size;
+
+	/**
+	 * Number of bytes allocated in the heap. This includes all processes
+	 * and private allocations in the kernel. It changes when new buffers
+	 * are allocated, freed, and moved. It cannot be larger than
+	 * heap_size.
+	 */
+	__u64 heap_usage;
+
+	/**
+	 * Theoretical possible max. size of buffer which
+	 * could be allocated in the given heap
+	 */
+	__u64 max_allocation;
+};
+
+struct drm_amdgpu_memory_info {
+	struct drm_amdgpu_heap_info vram;
+	struct drm_amdgpu_heap_info cpu_accessible_vram;
+	struct drm_amdgpu_heap_info gtt;
+};
+
+struct drm_amdgpu_info_firmware {
+	__u32 ver;
+	__u32 feature;
+};
+
+struct drm_amdgpu_info_vbios {
+	__u8 name[64];
+	__u8 vbios_pn[64];
+	__u32 version;
+	__u32 pad;
+	__u8 vbios_ver_str[32];
+	__u8 date[32];
+};
+
+#define AMDGPU_VRAM_TYPE_UNKNOWN 0
+#define AMDGPU_VRAM_TYPE_GDDR1 1
+#define AMDGPU_VRAM_TYPE_DDR2  2
+#define AMDGPU_VRAM_TYPE_GDDR3 3
+#define AMDGPU_VRAM_TYPE_GDDR4 4
+#define AMDGPU_VRAM_TYPE_GDDR5 5
+#define AMDGPU_VRAM_TYPE_HBM   6
+#define AMDGPU_VRAM_TYPE_DDR3  7
+#define AMDGPU_VRAM_TYPE_DDR4  8
+#define AMDGPU_VRAM_TYPE_GDDR6 9
+#define AMDGPU_VRAM_TYPE_DDR5  10
+#define AMDGPU_VRAM_TYPE_LPDDR4 11
+#define AMDGPU_VRAM_TYPE_LPDDR5 12
+
+struct drm_amdgpu_info_device {
+	/** PCI Device ID */
+	__u32 device_id;
+	/** Internal chip revision: A0, A1, etc.) */
+	__u32 chip_rev;
+	__u32 external_rev;
+	/** Revision id in PCI Config space */
+	__u32 pci_rev;
+	__u32 family;
+	__u32 num_shader_engines;
+	__u32 num_shader_arrays_per_engine;
+	/* in KHz */
+	__u32 gpu_counter_freq;
+	__u64 max_engine_clock;
+	__u64 max_memory_clock;
+	/* cu information */
+	__u32 cu_active_number;
+	/* NOTE: cu_ao_mask is INVALID, DON'T use it */
+	__u32 cu_ao_mask;
+	__u32 cu_bitmap[4][4];
+	/** Render backend pipe mask. One render backend is CB+DB. */
+	__u32 enabled_rb_pipes_mask;
+	__u32 num_rb_pipes;
+	__u32 num_hw_gfx_contexts;
+	/* PCIe version (the smaller of the GPU and the CPU/motherboard) */
+	__u32 pcie_gen;
+	__u64 ids_flags;
+	/** Starting virtual address for UMDs. */
+	__u64 virtual_address_offset;
+	/** The maximum virtual address */
+	__u64 virtual_address_max;
+	/** Required alignment of virtual addresses. */
+	__u32 virtual_address_alignment;
+	/** Page table entry - fragment size */
+	__u32 pte_fragment_size;
+	__u32 gart_page_size;
+	/** constant engine ram size*/
+	__u32 ce_ram_size;
+	/** video memory type info*/
+	__u32 vram_type;
+	/** video memory bit width*/
+	__u32 vram_bit_width;
+	/* vce harvesting instance */
+	__u32 vce_harvest_config;
+	/* gfx double offchip LDS buffers */
+	__u32 gc_double_offchip_lds_buf;
+	/* NGG Primitive Buffer */
+	__u64 prim_buf_gpu_addr;
+	/* NGG Position Buffer */
+	__u64 pos_buf_gpu_addr;
+	/* NGG Control Sideband */
+	__u64 cntl_sb_buf_gpu_addr;
+	/* NGG Parameter Cache */
+	__u64 param_buf_gpu_addr;
+	__u32 prim_buf_size;
+	__u32 pos_buf_size;
+	__u32 cntl_sb_buf_size;
+	__u32 param_buf_size;
+	/* wavefront size*/
+	__u32 wave_front_size;
+	/* shader visible vgprs*/
+	__u32 num_shader_visible_vgprs;
+	/* CU per shader array*/
+	__u32 num_cu_per_sh;
+	/* number of tcc blocks*/
+	__u32 num_tcc_blocks;
+	/* gs vgt table depth*/
+	__u32 gs_vgt_table_depth;
+	/* gs primitive buffer depth*/
+	__u32 gs_prim_buffer_depth;
+	/* max gs wavefront per vgt*/
+	__u32 max_gs_waves_per_vgt;
+	/* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */
+	__u32 pcie_num_lanes;
+	/* always on cu bitmap */
+	__u32 cu_ao_bitmap[4][4];
+	/** Starting high virtual address for UMDs. */
+	__u64 high_va_offset;
+	/** The maximum high virtual address */
+	__u64 high_va_max;
+	/* gfx10 pa_sc_tile_steering_override */
+	__u32 pa_sc_tile_steering_override;
+	/* disabled TCCs */
+	__u64 tcc_disabled_mask;
+	__u64 min_engine_clock;
+	__u64 min_memory_clock;
+	/* The following fields are only set on gfx11+, older chips set 0. */
+	__u32 tcp_cache_size;       /* AKA GL0, VMEM cache */
+	__u32 num_sqc_per_wgp;
+	__u32 sqc_data_cache_size;  /* AKA SMEM cache */
+	__u32 sqc_inst_cache_size;
+	__u32 gl1c_cache_size;
+	__u32 gl2c_cache_size;
+	__u64 mall_size;            /* AKA infinity cache */
+	/* high 32 bits of the rb pipes mask */
+	__u32 enabled_rb_pipes_mask_hi;
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+};
+
+struct drm_amdgpu_info_hw_ip {
+	/** Version of h/w IP */
+	__u32  hw_ip_version_major;
+	__u32  hw_ip_version_minor;
+	/** Capabilities */
+	__u64  capabilities_flags;
+	/** command buffer address start alignment*/
+	__u32  ib_start_alignment;
+	/** command buffer size alignment*/
+	__u32  ib_size_alignment;
+	/** Bitmask of available rings. Bit 0 means ring 0, etc. */
+	__u32  available_rings;
+	/** version info: bits 23:16 major, 15:8 minor, 7:0 revision */
+	__u32  ip_discovery_version;
+};
+
+/* GFX metadata BO sizes and alignment info (in bytes) */
+struct drm_amdgpu_info_uq_fw_areas_gfx {
+	/* shadow area size */
+	__u32 shadow_size;
+	/* shadow area base virtual mem alignment */
+	__u32 shadow_alignment;
+	/* context save area size */
+	__u32 csa_size;
+	/* context save area base virtual mem alignment */
+	__u32 csa_alignment;
+};
+
+/* IP specific fw related information used in the
+ * subquery AMDGPU_INFO_UQ_FW_AREAS
+ */
+struct drm_amdgpu_info_uq_fw_areas {
+	union {
+		struct drm_amdgpu_info_uq_fw_areas_gfx gfx;
+	};
+};
+
+struct drm_amdgpu_info_num_handles {
+	/** Max handles as supported by firmware for UVD */
+	__u32  uvd_max_handles;
+	/** Handles currently in use for UVD */
+	__u32  uvd_used_handles;
+};
+
+#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES		6
+
+struct drm_amdgpu_info_vce_clock_table_entry {
+	/** System clock */
+	__u32 sclk;
+	/** Memory clock */
+	__u32 mclk;
+	/** VCE clock */
+	__u32 eclk;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_vce_clock_table {
+	struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES];
+	__u32 num_valid_entries;
+	__u32 pad;
+};
+
+/* query video encode/decode caps */
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2			0
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4			1
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1			2
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC		3
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC			4
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG			5
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9			6
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1			7
+#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT			8
+
+struct drm_amdgpu_info_video_codec_info {
+	__u32 valid;
+	__u32 max_width;
+	__u32 max_height;
+	__u32 max_pixels_per_frame;
+	__u32 max_level;
+	__u32 pad;
+};
+
+struct drm_amdgpu_info_video_caps {
+	struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT];
+};
+
+#define AMDGPU_VMHUB_TYPE_MASK			0xff
+#define AMDGPU_VMHUB_TYPE_SHIFT			0
+#define AMDGPU_VMHUB_TYPE_GFX			0
+#define AMDGPU_VMHUB_TYPE_MM0			1
+#define AMDGPU_VMHUB_TYPE_MM1			2
+#define AMDGPU_VMHUB_IDX_MASK			0xff00
+#define AMDGPU_VMHUB_IDX_SHIFT			8
+
+struct drm_amdgpu_info_gpuvm_fault {
+	__u64 addr;
+	__u32 status;
+	__u32 vmhub;
+};
+
+struct drm_amdgpu_info_uq_metadata_gfx {
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+};
+
+struct drm_amdgpu_info_uq_metadata {
+	union {
+		struct drm_amdgpu_info_uq_metadata_gfx gfx;
+	};
+};
+
+/*
+ * Supported GPU families
+ */
+#define AMDGPU_FAMILY_UNKNOWN			0
+#define AMDGPU_FAMILY_SI			110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */
+#define AMDGPU_FAMILY_CI			120 /* Bonaire, Hawaii */
+#define AMDGPU_FAMILY_KV			125 /* Kaveri, Kabini, Mullins */
+#define AMDGPU_FAMILY_VI			130 /* Iceland, Tonga */
+#define AMDGPU_FAMILY_CZ			135 /* Carrizo, Stoney */
+#define AMDGPU_FAMILY_AI			141 /* Vega10 */
+#define AMDGPU_FAMILY_RV			142 /* Raven */
+#define AMDGPU_FAMILY_NV			143 /* Navi10 */
+#define AMDGPU_FAMILY_VGH			144 /* Van Gogh */
+#define AMDGPU_FAMILY_GC_11_0_0			145 /* GC 11.0.0 */
+#define AMDGPU_FAMILY_YC			146 /* Yellow Carp */
+#define AMDGPU_FAMILY_GC_11_0_1			148 /* GC 11.0.1 */
+#define AMDGPU_FAMILY_GC_10_3_6			149 /* GC 10.3.6 */
+#define AMDGPU_FAMILY_GC_10_3_7			151 /* GC 10.3.7 */
+#define AMDGPU_FAMILY_GC_11_5_0			150 /* GC 11.5.0 */
+#define AMDGPU_FAMILY_GC_12_0_0			152 /* GC 12.0.0 */
+
+/* FIXME wrong namespace! */
+struct drm_color_ctm_3x4 {
+	/*
+	 * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[12];
+};
+
+/* CRIU ioctl
+ *
+ * When checkpointing a process, the CRIU amdgpu plugin will perform:
+ * 1. INFO op to get information about state that needs to be saved. This
+ *    pauses execution until the checkpoint is done.
+ * 2. CHECKPOINT op to save state (BOs for now, TODO: CS contexts)
+ * 3. UNPAUSE op to resume execution when the checkpoint is done.
+ *
+ * When restoring a process, the CRIU amdgpu plugin will perform:
+ *
+ * 1. RESTORE op to restore state
+ * 2. RESUME op to restore userptr mappings (TODO)
+ */
+enum drm_amdgpu_criu_op {
+	AMDGPU_CRIU_OP_PROCESS_INFO,
+	AMDGPU_CRIU_OP_CHECKPOINT,
+	AMDGPU_CRIU_OP_UNPAUSE,
+	AMDGPU_CRIU_OP_RESTORE,
+	AMDGPU_CRIU_OP_RESTORE_RETRY,
+	AMDGPU_CRIU_OP_RESUME,
+};
+
+struct drm_amdgpu_criu_args {
+	__u64 bos; /* user pointer to bos array */
+	__u64 priv_data; /* user pointer to private data */
+	__u64 priv_data_size;
+	__u32 num_bos;
+	__u32 num_objs;
+	__u32 pid;
+	__u32 op;
+};
+
+#define AMDGPU_CRIU_BO_FLAG_IS_IMPORT	(1 << 0)
+#define AMDGPU_CRIU_BO_FLAG_SKIP	(1 << 1)
+
+struct drm_amdgpu_criu_bo_bucket {
+	__u64 addr;
+	__u64 size;
+	__u64 offset;
+	__u64 restored_offset;    /* During restore, updated offset for BO */
+	__u64 alloc_flags;
+	__u32 preferred_domains;
+	__u32 dmabuf_fd;
+	__u32 flags;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c
index 96c0861628..9a4a8d7915 100644
--- a/plugins/amdgpu/amdgpu_plugin.c
+++ b/plugins/amdgpu/amdgpu_plugin.c
@@ -12,6 +12,8 @@
 #include <sys/sysmacros.h>
 #include <sys/mman.h>
 #include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
 #include <stdint.h>
 #include <pthread.h>
 #include <semaphore.h>
@@ -23,13 +25,17 @@
 #include "criu-plugin.h"
 #include "plugin.h"
 #include "criu-amdgpu.pb-c.h"
+#include "util.h"
+#include "util-pie.h"
 
 #include "kfd_ioctl.h"
 #include "xmalloc.h"
 #include "criu-log.h"
 #include "files.h"
+#include "sockets.h"
 
 #include "common/list.h"
+#include "amdgpu_plugin_dmabuf.h"
 #include "amdgpu_plugin_drm.h"
 #include "amdgpu_plugin_util.h"
 #include "amdgpu_plugin_topology.h"
@@ -37,7 +43,7 @@
 #include "img-streamer.h"
 #include "image.h"
 #include "cr_options.h"
-
+#include "util.h"
 struct vma_metadata {
 	struct list_head list;
 	uint64_t old_pgoff;
@@ -49,21 +55,20 @@ struct vma_metadata {
 
 /************************************ Global Variables ********************************************/
 
-/**
- * FD of KFD device used to checkpoint. On a multi-process
- * tree the order of checkpointing goes from parent to child
- * and so on - so saving the FD will not be overwritten
- */
-static int kfd_checkpoint_fd;
-
 static LIST_HEAD(update_vma_info_list);
 
+static LIST_HEAD(amdgpu_processes);
+
 size_t kfd_max_buffer_size;
 
 bool plugin_added_to_inventory = false;
 
 bool plugin_disabled = false;
 
+int dmabuf_socket_fd = -1;
+
+int current_pid;
+
 /**************************************************************************************************/
 
 /* Call ioctl, restarting if it is interrupted */
@@ -503,11 +508,11 @@ void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va,
 	amdgpu_bo_free(h_bo);
 }
 
-static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp,
+int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp,
 						void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
-						uint64_t max_copy_size, enum sdma_op_type type)
+						uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free)
 {
-	uint64_t size, src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain;
+	uint64_t src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain;
 	uint64_t gpu_addr_src, gpu_addr_dst, gpu_addr_ib, copy_src, copy_dst, copy_size;
 	amdgpu_va_handle h_va_src, h_va_dst, h_va_ib;
 	amdgpu_bo_handle h_bo_src, h_bo_dst, h_bo_ib;
@@ -520,10 +525,8 @@ static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp,
 	uint32_t expired;
 	amdgpu_context_handle h_ctx;
 	uint32_t *ib = NULL;
-	int j, err, shared_fd, packets_per_buffer;
+	int j, err, packets_per_buffer;
 
-	shared_fd = bo_bucket.dmabuf_fd;
-	size = bo_bucket.size;
 	buffer_bo_size = min(size, buffer_size);
 	packets_per_buffer = ((buffer_bo_size - 1) / max_copy_size) + 1;
 	src_bo_size = (type == SDMA_OP_VRAM_WRITE) ? buffer_bo_size : size;
@@ -734,7 +737,8 @@ static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp,
 	if (err)
 		pr_perror("dest range free failed");
 err_dst_va:
-	err = amdgpu_bo_free(h_bo_dst);
+	if (!do_not_free)
+		err = amdgpu_bo_free(h_bo_dst);
 	if (err)
 		pr_perror("dest bo free failed");
 err_dst_bo_prep:
@@ -822,8 +826,9 @@ void *dump_bo_contents(void *_thread_data)
 		num_bos++;
 
 		/* perform sDMA based vram copy */
-		ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
-				   SDMA_OP_VRAM_READ);
+		ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_READ, false);
+
 		if (ret) {
 			pr_err("Failed to drain the BO using sDMA: bo_buckets[%d]\n", i);
 			break;
@@ -920,8 +925,8 @@ void *restore_bo_contents(void *_thread_data)
 
 		num_bos++;
 
-		ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
-				   SDMA_OP_VRAM_WRITE);
+		ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_WRITE, false);
 		if (ret) {
 			pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
 			break;
@@ -1007,28 +1012,157 @@ int restore_hsakmt_shared_mem(const uint64_t shared_mem_size, const uint32_t sha
 	return 0;
 }
 
-static int unpause_process(int fd)
+int amdgpu_unpause_processes(int pid)
 {
 	int ret = 0;
 	struct kfd_ioctl_criu_args args = { 0 };
+	struct list_head *l = get_dumped_fds();
+	struct dumped_fd *st;
+
+	list_for_each_entry(st, l, l) {
+		if (st->is_drm) {
+			ret = amdgpu_plugin_drm_unpause_file(st->fd);
+			if (ret) {
+				pr_perror("Failed to unpause drm device file");
+				goto exit;
+			}
+			close(st->fd);
+		} else {
+			args.op = KFD_CRIU_OP_UNPAUSE;
 
-	args.op = KFD_CRIU_OP_UNPAUSE;
-
-	ret = kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args);
-	if (ret) {
-		pr_perror("Failed to unpause process");
-		goto exit;
+			ret = kmtIoctl(st->fd, AMDKFD_IOC_CRIU_OP, &args);
+			if (ret) {
+				pr_perror("Failed to unpause process");
+				goto exit;
+			}
+		}
 	}
 
-	// Reset the KFD FD
-	kfd_checkpoint_fd = -1;
-	sys_close_drm_render_devices(&src_topology);
-
 exit:
 	pr_info("Process unpaused %s (ret:%d)\n", ret ? "Failed" : "Ok", ret);
+	clear_dumped_fds();
 
 	return ret;
 }
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_DEVICE_LATE, amdgpu_unpause_processes)
+
+static void dmabuf_socket_name_gen(struct sockaddr_un *addr, int *len, int pid)
+{
+	addr->sun_family = AF_UNIX;
+	snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-amdgpu-dmabuf-%d-%" PRIx64, pid, criu_run_id);
+	*len = SUN_LEN(addr);
+	*addr->sun_path = '\0';
+}
+
+int amdgpu_make_socket(int pid)
+{
+	int ret = 0;
+	struct amdgpu_process *p;
+	struct sockaddr_un saddr;
+	int sock, slen;
+
+	list_for_each_entry(p, &amdgpu_processes, l) {
+		if (p->pid == pid) {
+			dmabuf_socket_fd = get_unused_high_fd();
+			current_pid = pid;
+
+			sock = socket(PF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+			if (sock < 0) {
+				pr_perror("Can't create socket");
+				ret = -1;
+				goto out;
+			}
+
+			dmabuf_socket_name_gen(&saddr, &slen, pid);
+			if (bind(sock, (struct sockaddr *)&saddr, slen) < 0) {
+				pr_perror("Can't bind dmabuf socket %s", saddr.sun_path + 1);
+				close(sock);
+				ret = -1;
+				goto out;
+			}
+
+			ret = fcntl(sock, F_DUPFD, dmabuf_socket_fd);
+			if (ret < 0) {
+				close(sock);
+				goto out;
+			} else if (ret != dmabuf_socket_fd) {
+				close(dmabuf_socket_fd);
+				close(sock);
+				ret = -1;
+				goto out;
+			}
+			close(sock);
+			ret = 0;
+		}
+	}
+
+	out:
+
+	return ret;
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_EARLY, amdgpu_make_socket)
+
+int serve_out_dmabuf_fd(int handle, int fd)
+{
+	int ret = 0;
+	struct amdgpu_process *p;
+	struct sockaddr_un saddr;
+	int len;
+
+	list_for_each_entry(p, &amdgpu_processes, l) {
+		dmabuf_socket_name_gen(&saddr, &len, p->pid);
+
+		ret = send_fds(dmabuf_socket_fd, &saddr, len, &fd, 1, (void *)&handle, sizeof(handle));
+		if (ret < 0)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+int amdgpu_collect_file(int pid, int fd)
+{
+	struct amdgpu_process *p;
+
+	list_for_each_entry(p, &amdgpu_processes, l)
+		if (p->pid == pid)
+			return 0;
+
+	p = malloc(sizeof(struct amdgpu_process));
+
+	if (!p)
+		return -ENOMEM;
+
+	p->pid = pid;
+
+	list_add(&p->l, &amdgpu_processes);
+
+	return 0;
+}
+CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__COLLECT_FILE, amdgpu_collect_file)
+
+static int recv_dmabuf_fds(void)
+{
+	int fd, newfd, ret, handle;
+
+	while (true) {
+		ret = __recv_fds(dmabuf_socket_fd, &fd, 1, (void *)&handle, sizeof(handle), MSG_DONTWAIT);
+
+		if (ret == -EAGAIN || ret == -EWOULDBLOCK)
+			return 0;
+		else if (ret)
+			return -1;
+
+		newfd = get_unused_high_fd();
+
+		reopen_fd_as(newfd, fd);
+
+		record_shared_dmabuf_fd(handle, newfd);
+	}
+
+	return 0;
+}
 
 static int save_devices(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_device_bucket *device_buckets,
 			CriuKfd *e)
@@ -1072,6 +1206,8 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd
 {
 	struct thread_data *thread_datas;
 	int ret = 0, i;
+	amdgpu_device_handle h_dev;
+	uint32_t major, minor;
 
 	pr_debug("Dumping %d BOs\n", args->num_bos);
 
@@ -1095,6 +1231,19 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd
 		boinfo->size = bo_bucket->size;
 		boinfo->offset = bo_bucket->offset;
 		boinfo->alloc_flags = bo_bucket->alloc_flags;
+
+		ret = amdgpu_device_initialize(node_get_drm_render_device(sys_get_node_by_gpu_id(&src_topology, bo_bucket->gpu_id)), &major, &minor, &h_dev);
+
+		boinfo->handle = get_gem_handle(h_dev, bo_bucket->dmabuf_fd);
+
+		amdgpu_device_deinitialize(h_dev);
+	}
+	for (i = 0; i < e->num_of_bos; i++) {
+		KfdBoEntry *boinfo = e->bo_entries[i];
+
+		ret = record_shared_bo(boinfo->handle, false);
+		if (ret)
+			goto exit;
 	}
 
 	for (int i = 0; i < e->num_of_gpus; i++) {
@@ -1215,10 +1364,17 @@ int amdgpu_plugin_dump_file(int fd, int id)
 		return -1;
 	}
 
-	/* Initialize number of device files that will be checkpointed */
-	init_gpu_count(&src_topology);
+	/* Check whether this plugin was called for kfd, dmabuf or render nodes */
+	ret = get_dmabuf_info(fd, &st);
+	if (ret < 0) {
+		pr_perror("Failed to get dmabuf info");
+		return -1;
+	} else if (ret == 0) {
+		pr_info("Dumping dmabuf fd = %d\n", fd);
+		ret = amdgpu_plugin_dmabuf_dump(fd, id, &st);
+		return ret;
+	}
 
-	/* Check whether this plugin was called for kfd or render nodes */
 	if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) {
 
 		/* This is RenderD dumper plugin, for now just save renderD
@@ -1229,12 +1385,7 @@ int amdgpu_plugin_dump_file(int fd, int id)
 		if (ret)
 			return ret;
 
-		/* Invoke unpause process if needed */
-		decrement_checkpoint_count();
-		if (checkpoint_is_complete()) {
-			ret = unpause_process(kfd_checkpoint_fd);
-		}
-
+		ret = record_dumped_fd(fd, true);
 		/* Need to return success here so that criu can call plugins for renderD nodes */
 		return ret;
 	}
@@ -1331,14 +1482,11 @@ int amdgpu_plugin_dump_file(int fd, int id)
 
 	xfree(buf);
 
-exit:
-	/* Restore all queues if conditions permit */
-	kfd_checkpoint_fd = fd;
-	decrement_checkpoint_count();
-	if (checkpoint_is_complete()) {
-		ret = unpause_process(fd);
-	}
+	ret = record_dumped_fd(fd, false);
+	if (ret)
+		goto exit;
 
+exit:
 	xfree((void *)args.devices);
 	xfree((void *)args.bos);
 	xfree((void *)args.priv_data);
@@ -1361,7 +1509,6 @@ static int restore_devices(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 	int ret = 0, bucket_index = 0;
 
 	pr_debug("Restoring %d devices\n", e->num_of_gpus);
-
 	args->num_devices = e->num_of_gpus;
 	device_buckets = xzalloc(sizeof(*device_buckets) * args->num_devices);
 	if (!device_buckets)
@@ -1431,9 +1578,33 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e)
 
 		plugin_log_msg("BO [%d] gpu_id:%x addr:%llx size:%llx offset:%llx\n", i, bo_bucket->gpu_id,
 			       bo_bucket->addr, bo_bucket->size, bo_bucket->offset);
+
 	}
 
 	pr_info("Restore BOs Ok\n");
+
+	return 0;
+}
+
+int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int fd)
+{
+	struct vma_metadata *vma_md;
+
+	vma_md = xmalloc(sizeof(*vma_md));
+	if (!vma_md) {
+		return -ENOMEM;
+	}
+
+	memset(vma_md, 0, sizeof(*vma_md));
+
+	vma_md->old_pgoff = offset;
+	vma_md->vma_entry = addr;
+
+	vma_md->new_pgoff = restored_offset;
+	vma_md->fd = fd;
+
+	list_add_tail(&vma_md->list, &update_vma_info_list);
+
 	return 0;
 }
 
@@ -1551,7 +1722,7 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf
 	return ret;
 }
 
-int amdgpu_plugin_restore_file(int id)
+int amdgpu_plugin_restore_file(int id, bool *retry_needed)
 {
 	int ret = 0, fd;
 	char img_path[PATH_MAX];
@@ -1562,9 +1733,15 @@ int amdgpu_plugin_restore_file(int id)
 	size_t img_size;
 	FILE *img_fp = NULL;
 
+	*retry_needed = false;
+
 	if (plugin_disabled)
 		return -ENOTSUP;
 
+	ret = recv_dmabuf_fds();
+	if (ret)
+		return ret;
+
 	pr_info("Initialized kfd plugin restorer with ID = %d\n", id);
 
 	snprintf(img_path, sizeof(img_path), IMG_KFD_FILE, id);
@@ -1580,12 +1757,17 @@ int amdgpu_plugin_restore_file(int id)
 		 * first as we assume restore_maps is already filled. Need to fix this later.
 		 */
 		snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id);
-		pr_info("Restoring RenderD %s\n", img_path);
 
 		img_fp = open_img_file(img_path, false, &img_size);
-		if (!img_fp)
-			return -EINVAL;
-
+		if (!img_fp) {
+			ret = amdgpu_plugin_dmabuf_restore(id);
+			if (ret == 1) {
+				*retry_needed = true;
+				return 0;
+			}
+			return ret;
+		}
+		pr_info("Restoring RenderD %s\n", img_path);
 		pr_debug("RenderD Image file size:%ld\n", img_size);
 		buf = xmalloc(img_size);
 		if (!buf) {
@@ -1626,8 +1808,18 @@ int amdgpu_plugin_restore_file(int id)
 		pr_info("render node destination gpu_id = 0x%04x\n", tp_node->gpu_id);
 
 		fd = node_get_drm_render_device(tp_node);
-		if (fd < 0)
+		if (fd < 0) {
 			pr_err("Failed to open render device (minor:%d)\n", tp_node->drm_render_minor);
+			return -1;
+		}
+
+		ret = amdgpu_plugin_drm_restore_file(fd, rd);
+		if (ret == 1)
+			*retry_needed = true;
+		if (ret < 0) {
+			fd = ret;
+			goto fail;
+		}
 	fail:
 		criu_render_node__free_unpacked(rd, NULL);
 		xfree(buf);
@@ -1639,12 +1831,20 @@ int amdgpu_plugin_restore_file(int id)
 		 * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in
 		 * tp_node.
 		 */
-		fd = dup(fd);
-		if (fd == -1) {
-			pr_perror("unable to duplicate the render fd");
-			return -1;
+
+		if (fd < 0)
+			return fd;
+
+		if (!(*retry_needed)) {
+			fd = dup(fd);
+			if (fd == -1) {
+				pr_perror("unable to duplicate the render fd");
+				return -1;
+			}
+			return fd;
 		}
-		return fd;
+
+		return 0;
 	}
 
 	fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);
@@ -1688,13 +1888,16 @@ int amdgpu_plugin_restore_file(int id)
 	 * This way, we know that the file descriptors we store will not conflict with file descriptors inside core
 	 * CRIU.
 	 */
-	fd_next = find_unused_fd_pid(e->pid);
-	if (fd_next <= 0) {
-		pr_err("Failed to find unused fd (fd:%d)\n", fd_next);
-		ret = -EINVAL;
-		goto exit;
+	if (fd_next == -1) {
+		fd_next = find_unused_fd_pid(e->pid);
+		if (fd_next <= 0) {
+			pr_err("Failed to find unused fd (fd:%d)\n", fd_next);
+			ret = -EINVAL;
+			goto exit;
+		}
 	}
 
+
 	ret = devinfo_to_topology(e->device_entries, e->num_of_gpus + e->num_of_cpus, &src_topology);
 	if (ret) {
 		pr_err("Failed to convert stored device information to topology\n");
@@ -1725,7 +1928,6 @@ int amdgpu_plugin_restore_file(int id)
 	args.num_objects = e->num_of_objects;
 	args.priv_data_size = e->priv_data.len;
 	args.priv_data = (uintptr_t)e->priv_data.data;
-
 	args.op = KFD_CRIU_OP_RESTORE;
 	if (kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args) == -1) {
 		pr_perror("Restore ioctl failed");
@@ -1733,6 +1935,18 @@ int amdgpu_plugin_restore_file(int id)
 		goto exit;
 	}
 
+	if (ret < 0)
+		goto exit;
+
+	for (int i = 0; i < args.num_bos; i++) {
+		struct kfd_criu_bo_bucket *bo_bucket = &((struct kfd_criu_bo_bucket *)args.bos)[i];
+		KfdBoEntry *bo_entry = e->bo_entries[i];
+
+		if (bo_entry->handle != -1) {
+			serve_out_dmabuf_fd(bo_entry->handle, bo_bucket->dmabuf_fd);
+		}
+	}
+
 	ret = restore_bo_data(id, (struct kfd_criu_bo_bucket *)args.bos, e);
 	if (ret)
 		goto exit;
@@ -1857,6 +2071,14 @@ int amdgpu_plugin_resume_devices_late(int target_pid)
 		}
 	}
 
+	clear_restore_state();
+	close(dmabuf_socket_fd);
+	while (!list_empty(&amdgpu_processes)) {
+		struct amdgpu_process *st = list_first_entry(&amdgpu_processes, struct amdgpu_process, l);
+		list_del(&st->l);
+		free(st);
+	}
+
 	close(fd);
 	return exit_code;
 }
diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.c b/plugins/amdgpu/amdgpu_plugin_dmabuf.c
new file mode 100644
index 0000000000..67294b2cad
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_plugin_dmabuf.c
@@ -0,0 +1,147 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+
+#include "common/list.h"
+#include "criu-amdgpu.pb-c.h"
+
+#include "xmalloc.h"
+#include "criu-log.h"
+#include "amdgpu_plugin_drm.h"
+#include "amdgpu_plugin_util.h"
+#include "amdgpu_plugin_dmabuf.h"
+
+#include "util.h"
+#include "common/scm.h"
+
+/* Return < 0 for error, > 0 for "not a dmabuf" and 0 "is a dmabuf" */
+int get_dmabuf_info(int fd, struct stat *st)
+{
+    char path[PATH_MAX];
+
+    if (read_fd_link(fd, path, sizeof(path)) < 0)
+        return -1;
+
+    if (strncmp(path, DMABUF_LINK, strlen(DMABUF_LINK)) != 0)
+        return 1;
+
+    return 0;
+}
+
+int amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id, struct stat *dmabuf_stat) {
+    int ret = 0;
+    char path[PATH_MAX];
+    size_t len = 0;
+    unsigned char *buf = NULL;
+    int gem_handle;
+
+    pr_info("Dumping dmabuf fd = %d\n", dmabuf_fd);
+
+    gem_handle = handle_for_shared_bo_fd(dmabuf_fd);
+    if (gem_handle < 0) {
+        pr_err("Failed to get handle for dmabuf_fd = %d\n", dmabuf_fd);
+        return -EAGAIN;  /* Retry needed */
+    }
+
+    CriuDmabufNode *node = xmalloc(sizeof(*node));
+    if (!node) {
+        pr_err("Failed to allocate memory for dmabuf node\n");
+        return -ENOMEM;
+    }
+    criu_dmabuf_node__init(node);
+
+    node->gem_handle = gem_handle;
+
+    if (node->gem_handle < 0) {
+        pr_err("Failed to get handle for dmabuf_fd\n");
+        xfree(node);
+        return -EINVAL;
+    }
+
+    /* Serialize metadata to a file */
+    snprintf(path, sizeof(path), IMG_DMABUF_FILE, id);
+    len = criu_dmabuf_node__get_packed_size(node);
+    buf = xmalloc(len);
+    if (!buf) {
+        pr_err("Failed to allocate buffer for dmabuf metadata\n");
+        xfree(node);
+        return -ENOMEM;
+    }
+    criu_dmabuf_node__pack(node, buf);
+    ret = write_img_file(path, buf, len);
+
+    xfree(buf);
+    xfree(node);
+    return ret;
+}
+
+int amdgpu_plugin_dmabuf_restore(int id) {
+    char path[PATH_MAX];
+    size_t img_size;
+    FILE *img_fp = NULL;
+    int ret = 0;
+    CriuDmabufNode *rd = NULL;
+    unsigned char *buf = NULL;
+
+    snprintf(path, sizeof(path), IMG_DMABUF_FILE, id);
+
+    /* Read serialized metadata */
+    img_fp = open_img_file(path, false, &img_size);
+    if (!img_fp) {
+        pr_err("Failed to open dmabuf metadata file: %s\n", path);
+        return -EINVAL;
+    }
+
+    pr_debug("dmabuf Image file size:%ld\n", img_size);
+    buf = xmalloc(img_size);
+    if (!buf) {
+        pr_perror("Failed to allocate memory");
+        return -ENOMEM;
+    }
+
+    ret = read_fp(img_fp, buf, img_size);
+    if (ret) {
+        pr_perror("Unable to read from %s", path);
+        xfree(buf);
+        return ret;
+    }
+
+    rd = criu_dmabuf_node__unpack(NULL, img_size, buf);
+    if (rd == NULL) {
+        pr_perror("Unable to parse the dmabuf message %d", id);
+        xfree(buf);
+        fclose(img_fp);
+        return -1;
+    }
+    fclose(img_fp);
+
+    pr_info("dmabuf node gem_handle = %d\n", rd->gem_handle);
+
+    /* Match GEM handle with shared_dmabuf list */
+    int dmabuf_fd = dmabuf_fd_for_handle(rd->gem_handle);
+    if (dmabuf_fd == -1) {
+        pr_err("Failed to find dmabuf_fd for GEM handle = %d\n",
+                                                            rd->gem_handle);
+        return 1;  /* Retry needed */
+    } else {
+        pr_info("Restored dmabuf_fd = %d for GEM handle = %d\n",
+                                                dmabuf_fd, rd->gem_handle);
+    }
+    ret = dmabuf_fd;
+
+    pr_info("Successfully restored dmabuf_fd %d\n",
+                                                                dmabuf_fd);
+    criu_dmabuf_node__free_unpacked(rd, NULL);
+    xfree(buf);
+    return ret;
+}
diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.h b/plugins/amdgpu/amdgpu_plugin_dmabuf.h
new file mode 100644
index 0000000000..3af708b16b
--- /dev/null
+++ b/plugins/amdgpu/amdgpu_plugin_dmabuf.h
@@ -0,0 +1,13 @@
+
+#ifndef __AMDGPU_PLUGIN_DMABUF_H__
+#define __AMDGPU_PLUGIN_DMABUF_H__
+
+#include "amdgpu_plugin_util.h"
+#include "criu-amdgpu.pb-c.h"
+
+int amdgpu_plugin_dmabuf_dump(int fd, int id, struct stat *dmabuf_stat);
+int amdgpu_plugin_dmabuf_restore(int id);
+
+int get_dmabuf_info(int fd, struct stat *st);
+
+#endif /* __AMDGPU_PLUGIN_DMABUF_H__ */
\ No newline at end of file
diff --git a/plugins/amdgpu/amdgpu_plugin_drm.c b/plugins/amdgpu/amdgpu_plugin_drm.c
index d54cd937d5..124fa074a9 100644
--- a/plugins/amdgpu/amdgpu_plugin_drm.c
+++ b/plugins/amdgpu/amdgpu_plugin_drm.c
@@ -19,6 +19,7 @@
 
 #include <dirent.h>
 #include "common/list.h"
+#include "files.h"
 
 #include "criu-amdgpu.pb-c.h"
 
@@ -27,12 +28,79 @@
 
 #include "xmalloc.h"
 #include "criu-log.h"
-#include "kfd_ioctl.h"
+#include "amdgpu_drm.h"
 #include "amdgpu_plugin_drm.h"
 #include "amdgpu_plugin_util.h"
 #include "amdgpu_plugin_topology.h"
 
 
+#include "util.h"
+#include "common/scm.h"
+
+int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd)
+{
+	uint32_t handle;
+	int fd = amdgpu_device_get_fd(h_dev);
+
+	if (dmabuf_fd == -1) {
+		return -1;
+	}
+
+	drmPrimeFDToHandle(fd, dmabuf_fd, &handle);
+
+	return handle;
+}
+
+int drmIoctl(int fd, unsigned long request, void *arg)
+{
+	int ret, max_retries = 200;
+
+	do {
+		ret = ioctl(fd, request, arg);
+	} while (ret == -1 && max_retries-- > 0 && (errno == EINTR || errno == EAGAIN));
+
+	if (ret == -1 && errno == EBADF)
+		/* In case pthread_atfork didn't catch it, this will
+		 * make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
+		 */
+		pr_perror("KFD file descriptor not valid in this process");
+	return ret;
+}
+
+static int allocate_bo_entries(CriuRenderNode *e, int num_bos)
+{
+	e->bo_entries = xmalloc(sizeof(DrmBoEntry *) * num_bos);
+	if (!e->bo_entries) {
+		pr_err("Failed to allocate bo_info\n");
+		return -ENOMEM;
+	}
+
+	for (int i = 0; i < num_bos; i++) {
+		DrmBoEntry *entry = xzalloc(sizeof(*entry));
+
+		if (!entry) {
+			pr_err("Failed to allocate botest\n");
+			return -ENOMEM;
+		}
+
+		drm_bo_entry__init(entry);
+
+		e->bo_entries[i] = entry;
+		e->n_bo_entries++;
+	}
+	return 0;
+}
+
+static void free_e(CriuRenderNode *e)
+{
+	for (int i = 0; i < e->n_bo_entries; i++) {
+		if (e->bo_entries[i])
+			xfree(e->bo_entries[i]);
+	}
+
+	xfree(e);
+}
+
 int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st)
 {
 	char path[PATH_MAX];
@@ -60,19 +128,196 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st)
 	return 0;
 }
 
+static int restore_bo_contents_drm(int drm_render_minor, pid_t pid, int drm_fd, uint64_t num_of_bos, struct drm_amdgpu_criu_bo_bucket *bo_buckets)
+{
+	size_t image_size = 0, total_bo_size = 0, max_bo_size = 0, buffer_size;
+	struct amdgpu_gpu_info gpu_info = { 0 };
+	amdgpu_device_handle h_dev;
+	uint64_t max_copy_size;
+	uint32_t major, minor;
+	FILE *bo_contents_fp = NULL;
+	void *buffer = NULL;
+	char img_path[40];
+	int num_bos = 0;
+	int i, ret = 0;
+
+	ret = amdgpu_device_initialize(drm_fd, &major, &minor, &h_dev);
+	if (ret) {
+		pr_perror("failed to initialize device");
+		goto exit;
+	}
+	plugin_log_msg("libdrm initialized successfully\n");
+
+	ret = amdgpu_query_gpu_info(h_dev, &gpu_info);
+	if (ret) {
+		pr_perror("failed to query gpuinfo via libdrm");
+		goto exit;
+	}
+
+	max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE :
+								   SDMA_LINEAR_COPY_MAX_SIZE - 1;
+
+	for (i = 0; i < num_of_bos; i++) {
+		if (bo_buckets[i].preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+			total_bo_size += bo_buckets[i].size;
+
+			if (bo_buckets[i].size > max_bo_size)
+				max_bo_size = bo_buckets[i].size;
+		}
+	}
+
+	buffer_size = max_bo_size;
+
+	posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size);
+	if (!buffer) {
+		pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE.");
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	for (i = 0; i < num_of_bos; i++) {
+
+		if (!(bo_buckets[i].preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
+			continue;
+
+		if (bo_buckets[i].addr == -1)
+			continue;
+
+		num_bos++;
+
+		snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, pid, drm_render_minor, i);
+		bo_contents_fp = open_img_file(img_path, false, &image_size);
+
+		ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size,
+				   SDMA_OP_VRAM_WRITE, true);
+		if (ret) {
+			pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i);
+			break;
+		}
+		plugin_log_msg("** Successfully filled the BO using sDMA: bo_buckets[%d] **\n", i);
+
+		if (bo_contents_fp)
+			fclose(bo_contents_fp);
+	}
+
+exit:
+	for (int i = 0; i < num_of_bos; i++) {
+		if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD)
+			close(bo_buckets[i].dmabuf_fd);
+	}
+
+	xfree(buffer);
+
+	amdgpu_device_deinitialize(h_dev);
+	return ret;
+}
 
 int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm)
 {
-	CriuRenderNode rd = CRIU_RENDER_NODE__INIT;
-	struct tp_node *tp_node;
+	CriuRenderNode *rd = NULL;
 	char path[PATH_MAX];
 	unsigned char *buf;
 	int minor;
 	int len;
 	int ret;
+	struct drm_amdgpu_criu_args args = {0};
+	size_t image_size;
+	struct tp_node *tp_node;
+
+	rd = xmalloc(sizeof(*rd));
+	if (!rd) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+	criu_render_node__init(rd);
 
 	/* Get the topology node of the DRM device */
 	minor = minor(drm->st_rdev);
+	rd->drm_render_minor = minor;
+
+	args.op = AMDGPU_CRIU_OP_PROCESS_INFO;
+	if (drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args) == -1) {
+		pr_perror("Failed to call process info ioctl");
+		ret = -1;
+		goto exit;
+	}
+
+	rd->pid = args.pid;
+	rd->num_of_bos = args.num_bos;
+	rd->num_of_objects = args.num_objs;
+	ret = allocate_bo_entries(rd, args.num_bos);
+	if (ret)
+		goto exit;
+
+	args.bos = (uintptr_t)xzalloc((args.num_bos * sizeof(struct drm_amdgpu_criu_bo_bucket)));
+	if (!args.bos) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	args.priv_data = (uintptr_t)xzalloc((args.priv_data_size));
+	if (!args.priv_data) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	args.op = AMDGPU_CRIU_OP_CHECKPOINT;
+	ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args);
+	if (ret) {
+		pr_perror("Failed to call dumper (process) ioctl");
+		goto exit;
+	}
+
+	rd->priv_data.data = (void *)args.priv_data;
+	rd->priv_data.len = args.priv_data_size;
+
+	for (int i = 0; i < args.num_bos; i++) {
+		struct drm_amdgpu_criu_bo_bucket bo_bucket = ((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i];
+		uint32_t major, minor;
+		amdgpu_device_handle h_dev;
+		void *buffer = NULL;
+		char img_path[40];
+		FILE *bo_contents_fp = NULL;
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+
+		boinfo->addr = bo_bucket.addr;
+		boinfo->size = bo_bucket.size;
+		boinfo->offset = bo_bucket.offset;
+		boinfo->alloc_flags = bo_bucket.alloc_flags;
+		boinfo->preferred_domains = bo_bucket.preferred_domains;
+
+		ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev);
+
+		snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->pid, rd->drm_render_minor, i); //TODO: needs to be unique by process and by device, and recoverable by restore
+		bo_contents_fp = open_img_file(img_path, true, &image_size);
+
+		posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), bo_bucket.size);
+
+		ret = sdma_copy_bo(bo_bucket.dmabuf_fd, bo_bucket.size, bo_contents_fp, buffer, bo_bucket.size, h_dev, 0x1000,
+				   SDMA_OP_VRAM_READ, false);
+
+		boinfo->handle = get_gem_handle(h_dev, bo_bucket.dmabuf_fd);
+		boinfo->is_import = (bo_bucket.flags & AMDGPU_CRIU_BO_FLAG_IS_IMPORT)
+			|| shared_bo_has_exporter(boinfo->handle);
+
+		if (bo_bucket.dmabuf_fd != KFD_INVALID_FD)
+			close(bo_bucket.dmabuf_fd);
+
+		if (bo_contents_fp)
+			fclose(bo_contents_fp);
+
+		ret = amdgpu_device_deinitialize(h_dev);
+		if (ret)
+			goto exit;
+	}
+	for (int i = 0; i < args.num_bos; i++) {
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+
+		ret = record_shared_bo(boinfo->handle, boinfo->is_import);
+		if (ret)
+			goto exit;
+	}
+
 	tp_node = sys_get_node_by_render_minor(&src_topology, minor);
 	if (!tp_node) {
 		pr_err("Failed to find a device with minor number = %d\n", minor);
@@ -80,21 +325,130 @@ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm)
 	}
 
 	/* Get the GPU_ID of the DRM device */
-	rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id);
-	if (!rd.gpu_id) {
-		pr_err("Failed to find valid gpu_id for the device = %d\n", rd.gpu_id);
+	rd->gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id);
+	if (!rd->gpu_id) {
+		pr_err("Failed to find valid gpu_id for the device = %d\n", rd->gpu_id);
 		return -ENODEV;
 	}
 
-	len = criu_render_node__get_packed_size(&rd);
+	len = criu_render_node__get_packed_size(rd);
 	buf = xmalloc(len);
 	if (!buf)
 		return -ENOMEM;
 
-	criu_render_node__pack(&rd, buf);
+	criu_render_node__pack(rd, buf);
 
 	snprintf(path, sizeof(path), IMG_DRM_FILE, id);
 	ret = write_img_file(path, buf, len);
+
+	exit:
+	xfree((void *)args.bos);
+	xfree((void *)args.priv_data);
 	xfree(buf);
+	free_e(rd);
 	return ret;
 }
+
+int amdgpu_plugin_drm_unpause_file(int fd)
+{
+	struct drm_amdgpu_criu_args args = {0};
+	int ret = 0;
+
+	args.op = AMDGPU_CRIU_OP_UNPAUSE;
+	if (drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args) == -1) {
+		pr_perror("Failed to call unpause ioctl");
+		ret = -1;
+		goto exit;
+	}
+
+	exit:
+	return ret;
+}
+
+int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd)
+{
+	struct drm_amdgpu_criu_args args = {0};
+	int ret = 0;
+	bool retry_needed = false;
+
+	args.num_bos = rd->num_of_bos;
+	args.num_objs = rd->num_of_objects;
+	args.priv_data = (uint64_t)rd->priv_data.data;
+	args.priv_data_size = rd->priv_data.len;
+	args.bos = (uint64_t)xzalloc(sizeof(struct drm_amdgpu_criu_bo_bucket) * rd->num_of_bos);
+
+	for (int i = 0; i < args.num_bos; i++) {
+		struct drm_amdgpu_criu_bo_bucket *bo_bucket = &((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i];
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+		int dmabuf_fd = -1;
+
+		bo_bucket->addr = boinfo->addr;
+
+		if (work_already_completed(boinfo->handle, rd->drm_render_minor)) {
+			bo_bucket->flags |= AMDGPU_CRIU_BO_FLAG_SKIP;
+		} else if (boinfo->handle != -1) {
+			if (boinfo->is_import) {
+				dmabuf_fd = dmabuf_fd_for_handle(boinfo->handle);
+				if (dmabuf_fd == -1) {
+					bo_bucket->flags |= AMDGPU_CRIU_BO_FLAG_SKIP;
+					retry_needed = true;
+				}
+			}
+		}
+
+		if (boinfo->is_import)
+			bo_bucket->flags |= AMDGPU_CRIU_BO_FLAG_IS_IMPORT;
+
+		bo_bucket->dmabuf_fd = dmabuf_fd;
+		bo_bucket->size = boinfo->size;
+		bo_bucket->offset = boinfo->offset;
+		bo_bucket->alloc_flags = boinfo->alloc_flags;
+		bo_bucket->preferred_domains = boinfo->preferred_domains;
+	}
+
+	args.op = AMDGPU_CRIU_OP_RESTORE;
+	if (drmIoctl(fd, DRM_IOCTL_AMDGPU_CRIU_OP, &args) == -1) {
+		pr_perror("Failed to call restore ioctl");
+		ret = -1;
+		goto exit;
+	}
+
+	for (int i = 0; i < args.num_bos; i++) {
+		struct drm_amdgpu_criu_bo_bucket *bo_bucket = &((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i];
+		DrmBoEntry *boinfo = rd->bo_entries[i];
+
+		if (!(bo_bucket->flags & AMDGPU_CRIU_BO_FLAG_SKIP) && !work_already_completed(boinfo->handle, rd->drm_render_minor)) {
+			ret = record_completed_work(boinfo->handle, rd->drm_render_minor);
+			if (ret)
+				goto exit;
+			if (!boinfo->is_import)
+				serve_out_dmabuf_fd(boinfo->handle, bo_bucket->dmabuf_fd);
+		}
+	}
+	ret = record_completed_work(-1, rd->drm_render_minor);
+	if (ret)
+		goto exit;
+
+	if (args.num_bos > 0) {
+
+		for (int i = 0; i < args.num_bos; i++) {
+			struct drm_amdgpu_criu_bo_bucket *bo_bucket = &((struct drm_amdgpu_criu_bo_bucket *)args.bos)[i];
+
+			if (!(bo_bucket->flags & AMDGPU_CRIU_BO_FLAG_SKIP))
+				ret = save_vma_updates(bo_bucket->offset, bo_bucket->addr, bo_bucket->restored_offset, fd);
+			if (ret < 0)
+				goto exit;
+		}
+
+		ret = restore_bo_contents_drm(rd->drm_render_minor, rd->pid, fd, args.num_bos, (struct drm_amdgpu_criu_bo_bucket *)args.bos);
+		if (ret)
+			goto exit;
+	}
+
+
+	exit:
+	if (ret < 0)
+		return ret;
+
+	return retry_needed;
+}
diff --git a/plugins/amdgpu/amdgpu_plugin_drm.h b/plugins/amdgpu/amdgpu_plugin_drm.h
index 6f0c1a9a63..3dd4499a6c 100644
--- a/plugins/amdgpu/amdgpu_plugin_drm.h
+++ b/plugins/amdgpu/amdgpu_plugin_drm.h
@@ -24,5 +24,13 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm);
  */
 int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm);
 
+int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd);
+
+int amdgpu_plugin_drm_unpause_file(int fd);
+
+int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd);
+
+int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int gpu_id);
+
 #endif		/* __AMDGPU_PLUGIN_DRM_H__ */
 
diff --git a/plugins/amdgpu/amdgpu_plugin_util.c b/plugins/amdgpu/amdgpu_plugin_util.c
index a165fc9cd5..06fd24e208 100644
--- a/plugins/amdgpu/amdgpu_plugin_util.c
+++ b/plugins/amdgpu/amdgpu_plugin_util.c
@@ -37,9 +37,12 @@
 #include "amdgpu_drm.h"
 #include "amdgpu_plugin_util.h"
 #include "amdgpu_plugin_topology.h"
+#include "amdgpu_plugin_drm.h"
 
-/* Tracks number of device files that need to be checkpointed */
-static int dev_file_cnt = 0;
+static LIST_HEAD(dumped_fds);
+static LIST_HEAD(shared_bos);
+static LIST_HEAD(shared_dmabuf_fds);
+static LIST_HEAD(completed_work);
 
 /* Helper structures to encode device topology of SRC and DEST platforms */
 struct tp_system src_topology;
@@ -49,37 +52,183 @@ struct tp_system dest_topology;
 struct device_maps checkpoint_maps;
 struct device_maps restore_maps;
 
-bool checkpoint_is_complete()
-{
-	return (dev_file_cnt == 0);
+int record_dumped_fd(int fd, bool is_drm) {
+	int newfd = dup(fd);
+
+	if (newfd < 0)
+		return newfd;
+	struct dumped_fd *st = malloc(sizeof(struct dumped_fd));
+	if (!st)
+		return -1;
+	st->fd = newfd;
+	st->is_drm = is_drm;
+	list_add(&st->l, &dumped_fds);
+
+	return 0;
 }
 
-void decrement_checkpoint_count()
-{
-	dev_file_cnt--;
+struct list_head *get_dumped_fds() {
+	return &dumped_fds;
 }
 
-void init_gpu_count(struct tp_system *topo)
-{
-	if (dev_file_cnt != 0)
-		return;
+bool shared_bo_has_exporter(int handle) {
+	struct shared_bo *bo;
+
+	if (handle == -1) {
+		return false;
+	}
 
-	/* We add ONE to include checkpointing of KFD device */
-	dev_file_cnt = 1 + topology_gpu_count(topo);
+	list_for_each_entry(bo, &shared_bos, l) {
+		if (bo->handle == handle) {
+			return bo->has_exporter;
+		}
+	}
+
+	return false;
 }
 
-int read_fp(FILE *fp, void *buf, const size_t buf_len)
-{
-	size_t len_read;
+int record_shared_bo(int handle, bool is_imported) {
+	struct shared_bo *bo;
 
-	len_read = fread(buf, 1, buf_len, fp);
-	if (len_read != buf_len) {
-		pr_err("Unable to read file (read:%ld buf_len:%ld)\n", len_read, buf_len);
-		return -EIO;
+	if (handle == -1)
+		return 0;
+
+	list_for_each_entry(bo, &shared_bos, l) {
+		if (bo->handle == handle) {
+			return 0;
+		}
+	}
+	bo = malloc(sizeof(struct shared_bo));
+	if (!bo)
+		return -1;
+	bo->handle = handle;
+	bo->has_exporter = !is_imported;
+	list_add(&bo->l, &shared_bos);
+
+	return 0;
+}
+
+int handle_for_shared_bo_fd(int fd) {
+	struct dumped_fd *df;
+	uint32_t trial_handle;
+	uint32_t df_handle;
+	amdgpu_device_handle h_dev;
+	uint32_t major, minor;
+
+	list_for_each_entry(df, &dumped_fds, l) {
+
+		/* see if the gem handle for fd using the hdev for df->fd is the
+		   same as bo->handle. */
+
+		if (!df->is_drm) {
+			continue;
+		}
+
+		if (amdgpu_device_initialize(df->fd, &major, &minor, &h_dev)) {
+			pr_err("Failed to initialize amdgpu device\n");
+			continue;
+		}
+
+		trial_handle = get_gem_handle(h_dev, fd);
+		df_handle = get_gem_handle(h_dev, df->fd);
+
+		amdgpu_device_deinitialize(h_dev);
+
+		if (df_handle == trial_handle) {
+			return trial_handle;
+		}
+	}
+
+	return -1;
+}
+
+int record_shared_dmabuf_fd(int handle, int dmabuf_fd) {
+	struct shared_dmabuf *bo;
+
+	bo = malloc(sizeof(struct shared_dmabuf));
+	if(!bo)
+		return -1;
+	bo->handle = handle;
+	bo->dmabuf_fd = dmabuf_fd;
+	list_add(&bo->l, &shared_dmabuf_fds);
+
+	return 0;
+}
+
+int dmabuf_fd_for_handle(int handle) {
+	struct shared_dmabuf *bo;
+
+	list_for_each_entry(bo, &shared_dmabuf_fds, l) {
+		if (bo->handle == handle) {
+			return bo->dmabuf_fd;
+		}
 	}
+
+	return -1;
+}
+
+int record_completed_work(int handle, int id) {
+	struct restore_completed_work *work;
+
+	work = malloc(sizeof(struct restore_completed_work));
+	if (!work)
+		return -1;
+	work->handle = handle;
+	work->id = id;
+	list_add(&work->l, &completed_work);
+
 	return 0;
 }
 
+bool work_already_completed(int handle, int id) {
+	struct restore_completed_work *work;
+
+	list_for_each_entry(work, &completed_work, l) {
+		if (work->handle == handle && work->id == id) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+void clear_restore_state() {
+	while (!list_empty(&shared_dmabuf_fds)) {
+		struct shared_dmabuf *st = list_first_entry(&shared_dmabuf_fds, struct shared_dmabuf, l);
+		list_del(&st->l);
+		close(st->dmabuf_fd);
+		free(st);
+	}
+
+	while (!list_empty(&completed_work)) {
+		struct restore_completed_work *st = list_first_entry(&completed_work, struct restore_completed_work, l);
+		list_del(&st->l);
+		free(st);
+	}
+}
+
+void clear_dumped_fds() {
+	while (!list_empty(&dumped_fds)) {
+		struct dumped_fd *st = list_first_entry(&dumped_fds, struct dumped_fd, l);
+		list_del(&st->l);
+		close(st->fd);
+		free(st);
+	}
+}
+
+int read_fp(FILE *fp, void *buf, const size_t buf_len)
+{
+       size_t len_read;
+
+       len_read = fread(buf, 1, buf_len, fp);
+       if (len_read != buf_len) {
+               pr_err("Unable to read file (read:%ld buf_len:%ld)\n", len_read, buf_len);
+               return -EIO;
+
+        }
+       return 0;
+}
+
 int write_fp(FILE *fp, const void *buf, const size_t buf_len)
 {
 	size_t len_write;
diff --git a/plugins/amdgpu/amdgpu_plugin_util.h b/plugins/amdgpu/amdgpu_plugin_util.h
index aacca3a28c..19cd47de80 100644
--- a/plugins/amdgpu/amdgpu_plugin_util.h
+++ b/plugins/amdgpu/amdgpu_plugin_util.h
@@ -1,6 +1,8 @@
 #ifndef __AMDGPU_PLUGIN_UTIL_H__
 #define __AMDGPU_PLUGIN_UTIL_H__
 
+#include <libdrm/amdgpu.h>
+
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE 1
 #endif
@@ -51,14 +53,18 @@
 /* Name of file having serialized data of DRM device */
 #define IMG_DRM_FILE			"amdgpu-renderD-%d.img"
 
+/* Name of file having serialized data of dmabuf meta */
+#define IMG_DMABUF_FILE 		"amdgpu-dmabuf_%d.img"
+
 /* Name of file having serialized data of DRM device buffer objects (BOs) */
-#define IMG_DRM_PAGES_FILE		"amdgpu-drm-pages-%d-%04x.img"
+#define IMG_DRM_PAGES_FILE		"amdgpu-drm-pages-%d-%d-%04x.img"
 
 /* Helper macros to Checkpoint and Restore a ROCm file */
 #define HSAKMT_SHM_PATH			"/dev/shm/hsakmt_shared_mem"
 #define HSAKMT_SHM				"/hsakmt_shared_mem"
 #define HSAKMT_SEM_PATH			"/dev/shm/sem.hsakmt_semaphore"
 #define HSAKMT_SEM				"hsakmt_semaphore"
+#define DMABUF_LINK "/dmabuf"
 
 /* Help macros to build sDMA command packets */
 #define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0))
@@ -73,6 +79,35 @@ enum sdma_op_type {
 	SDMA_OP_VRAM_WRITE,
 };
 
+struct dumped_fd {
+	struct list_head l;
+	int fd;
+	bool is_drm;
+};
+
+struct shared_bo {
+	struct list_head l;
+	int handle;
+	bool has_exporter;
+};
+
+struct shared_dmabuf {
+	struct list_head l;
+	int handle;
+	int dmabuf_fd;
+};
+
+struct restore_completed_work {
+	struct list_head l;
+	int handle;
+	int id;
+};
+
+struct amdgpu_process {
+	struct list_head l;
+	int pid;
+};
+
 /* Helper structures to encode device topology of SRC and DEST platforms */
 extern struct tp_system src_topology;
 extern struct tp_system dest_topology;
@@ -97,10 +132,28 @@ int read_file(const char *file_path, void *buf, const size_t buf_len);
 int write_img_file(char *path, const void *buf, const size_t buf_len);
 FILE *open_img_file(char *path, bool write, size_t *size);
 
-bool checkpoint_is_complete();
-void decrement_checkpoint_count();
-void init_gpu_count(struct tp_system *topology);
+int record_dumped_fd(int fd, bool is_drm);
+struct list_head *get_dumped_fds();
+void clear_dumped_fds();
+
+bool shared_bo_has_exporter(int handle);
+int record_shared_bo(int handle, bool is_imported);
+int handle_for_shared_bo_fd(int dmabuf_fd);
+
+int record_shared_dmabuf_fd(int handle, int dmabuf_fd);
+int dmabuf_fd_for_handle(int handle);
+
+int record_completed_work(int handle, int id);
+bool work_already_completed(int handle, int id);
+
+void clear_restore_state();
 
 void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list);
 
+int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp,
+						void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
+						uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free);
+
+int serve_out_dmabuf_fd(int handle, int fd);
+
 #endif		/* __AMDGPU_PLUGIN_UTIL_H__ */
diff --git a/plugins/amdgpu/criu-amdgpu.proto b/plugins/amdgpu/criu-amdgpu.proto
index 078b676500..e0764ced72 100644
--- a/plugins/amdgpu/criu-amdgpu.proto
+++ b/plugins/amdgpu/criu-amdgpu.proto
@@ -46,6 +46,7 @@ message kfd_bo_entry {
 	required uint64 offset = 3;
 	required uint32 alloc_flags = 4;
 	required uint32 gpu_id = 5;
+	required uint32 handle = 6;
 }
 
 message criu_kfd {
@@ -61,6 +62,26 @@ message criu_kfd {
 	required bytes priv_data = 10;
 }
 
+message drm_bo_entry {
+	required uint64 addr = 1;
+	required uint64 size = 2;
+	required uint64 offset = 3;
+	required uint64 alloc_flags = 4;
+	required uint32 preferred_domains = 5;
+	required uint32 handle = 6;
+	required uint32 is_import = 7;
+}
+
 message criu_render_node {
 	required uint32 gpu_id = 1;
+	required uint32 pid = 2;
+	required uint32 drm_render_minor = 3;
+	required uint64 num_of_bos = 4;
+	repeated drm_bo_entry bo_entries = 5;
+	required uint32 num_of_objects = 6;
+	required bytes priv_data = 7;
+}
+
+message criu_dmabuf_node {
+	required uint32 gem_handle = 1;
 }