]> xenbits.xensource.com Git - people/aperard/linux.git/commitdiff
drm/amdgpu: Init zone device and drm client after mode-1 reset on reload
authorAhmad Rehman <Ahmad.Rehman@amd.com>
Mon, 4 Mar 2024 21:56:00 +0000 (15:56 -0600)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 20 Mar 2024 17:12:57 +0000 (13:12 -0400)
In passthrough environment, when amdgpu is reloaded after unload, mode-1
is triggered after initializing the necessary IPs, That init does not
include KFD, and KFD init waits until the reset is completed. KFD init
is called in the reset handler, but in this case, the zone device and
drm client is not initialized, causing app to create kernel panic.

v2: Removing the init KFD condition from amdgpu_amdkfd_drm_client_create.
As the previous version has the potential of creating DRM client twice.

v3: v2 patch results in SDMA engine hung as DRM open causes VM clear to SDMA
before SDMA init. Adding the condition to in drm client creation, on top of v1,
to guard against drm client creation call multiple times.

Signed-off-by: Ahmad Rehman <Ahmad.Rehman@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index f5f2945711be0c215d6a812d217c402616fc1cef..35dd6effa9a34a1be9ce83f83ab7915a38f6b4e8 100644 (file)
@@ -146,7 +146,7 @@ int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
 {
        int ret;
 
-       if (!adev->kfd.init_complete)
+       if (!adev->kfd.init_complete || adev->kfd.client.dev)
                return 0;
 
        ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
index 15b188aaf681805af714f0ee6d8d660e70ed81d3..80b9642f2bc4f25c69e9f30c70138f073e0c6cd2 100644 (file)
@@ -2479,8 +2479,11 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
        }
        for (i = 0; i < mgpu_info.num_dgpu; i++) {
                adev = mgpu_info.gpu_ins[i].adev;
-               if (!adev->kfd.init_complete)
+               if (!adev->kfd.init_complete) {
+                       kgd2kfd_init_zone_device(adev);
                        amdgpu_amdkfd_device_init(adev);
+                       amdgpu_amdkfd_drm_client_create(adev);
+               }
                amdgpu_ttm_set_buffer_funcs_status(adev, true);
        }
 }