RE: [PATCH] swiotlb: check set_memory_decrypted()'s return value
From: Dexuan Cui
Date: Wed Nov 23 2022 - 20:51:51 EST
> From: Robin Murphy <robin.murphy@xxxxxxx>
> Sent: Monday, November 21, 2022 12:49 PM
> > [...]
> > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> > @@ -248,12 +248,16 @@ void __init swiotlb_update_mem_attributes(void)
> > struct io_tlb_mem *mem = &io_tlb_default_mem;
> > void *vaddr;
> > unsigned long bytes;
> > + int rc;
> >
> > if (!mem->nslabs || mem->late_alloc)
> > return;
> > vaddr = phys_to_virt(mem->start);
> > bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
> > - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> > +
> > + rc = set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> > + if (rc)
> > + panic("Failed to decrypt swiotlb bounce buffers (%d)\n", rc);
>
> Aww, I just cleaned up all the panics! AFAICS we could warn and set
Sorry, I didn't know about that... :-)
> mem->nslabs to 0 here to semi-gracefully disable SWIOTLB (presumably
> decryption failure is sufficiently unexpected that "leaking" the SWIOTLB
> memory is the least of the system's problems). Or indeed just warn and
> do nothing as in the swiotlb_init_late() case below - what's with the
> inconsistency? In either path we have the same expectation that
> decryption succeeds (or does nothing, successfully), so failure is no
> more or less fatal to later SWIOTLB operation depending on when the
> architecture chose to set it up.
I agree it's better to print an error message rather than panic here, but
anyway the kernel will panic later, e.g. when an AMD SEV-SNP guest runs
on Hyper-V, in case this set_memory_decrypted() fails, we set
mem->nslabs to 0, and next we'll hit a panic soon when the storage
driver calls swiotlb_tbl_map_single():
"Kernel panic - not syncing: Can not allocate SWIOTLB buffer earlier and
can't now provide you with the DMA bounce buffer".
> So in 3 init paths we have 3 different outcomes from the same thing :(
>
> 1: make the whole system unusable
> 2: continue with possible data corruption (or at least weird DMA errors)
> if devices still see encrypted memory contents
> 3: cleanly disable SWIOTLB, thus also all subsequent attempts to use it
>
> (OK, for the rmem case this isn't actually 3 since falling back to
> io_tlb_default_mem might work out more like 2, but hopefully you get my
> point)
>
> Thanks,
> Robin.
How do you like this new version:
1) I removed the panic().
2) For swiotlb_update_mem_attributes() and swiotlb_init_late(), I print
an error message and disable swiotlb: the error seems so bad that IMO
we have to disable swiotlb.
3) No change to rmem_swiotlb_device_init(). The error in this function
doesn't seem fatal to me.
The bottom line is that set_memory_decrypted() should not fail silently
and cause weird issues later... BTW, normally IMO set_memory_decrypted()
doesn't fail, but I did see a failure because we're expectd to retry
(the failure is fixed by
https://lwn.net/ml/linux-kernel/20221121195151.21812-3-decui%40microsoft.com/)
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -248,12 +248,20 @@ void __init swiotlb_update_mem_attributes(void)
struct io_tlb_mem *mem = &io_tlb_default_mem;
void *vaddr;
unsigned long bytes;
+ int rc;
if (!mem->nslabs || mem->late_alloc)
return;
vaddr = phys_to_virt(mem->start);
bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
- set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+
+ rc = set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+ if (rc) {
+ pr_err("Failed to decrypt swiotlb buffer (%d): disabling swiotlb!\n",
+ rc);
+ mem->nslabs = 0;
+ return;
+ }
mem->vaddr = swiotlb_mem_remap(mem, bytes);
if (!mem->vaddr)
@@ -391,7 +399,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
unsigned char *vstart = NULL;
- unsigned int order, area_order;
+ unsigned int order, area_order, slot_order;
bool retried = false;
int rc = 0;
@@ -442,19 +450,29 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
if (!mem->areas)
goto error_area;
+ slot_order = get_order(array_size(sizeof(*mem->slots), nslabs));
mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(array_size(sizeof(*mem->slots), nslabs)));
+ slot_order);
if (!mem->slots)
goto error_slots;
- set_memory_decrypted((unsigned long)vstart,
- (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+ rc = set_memory_decrypted((unsigned long)vstart,
+ (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+ if (rc) {
+ pr_err("Failed to decrypt swiotlb buffer (%d): disabling swiotlb!\n",
+ rc);
+ mem->nslabs = 0;
+ goto error_decrypted;
+ }
+
swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
default_nareas);
swiotlb_print_info();
return 0;
+error_decrypted:
+ free_pages((unsigned long)mem->slots, slot_order);
error_slots:
free_pages((unsigned long)mem->areas, area_order);
error_area:
@@ -986,6 +1004,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
/* Set Per-device io tlb area to one */
unsigned int nareas = 1;
+ int rc = -ENOMEM;
/*
* Since multiple devices can share the same pool, the private data,
@@ -998,21 +1017,22 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
return -ENOMEM;
mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
- if (!mem->slots) {
- kfree(mem);
- return -ENOMEM;
- }
+ if (!mem->slots)
+ goto free_mem;
mem->areas = kcalloc(nareas, sizeof(*mem->areas),
GFP_KERNEL);
- if (!mem->areas) {
- kfree(mem->slots);
- kfree(mem);
- return -ENOMEM;
+ if (!mem->areas)
+ goto free_slots;
+
+ rc = set_memory_decrypted(
+ (unsigned long)phys_to_virt(rmem->base),
+ rmem->size >> PAGE_SHIFT);
+ if (rc) {
+ pr_err("Failed to decrypt rmem buffer (%d)\n", rc);
+ goto free_areas;
}
- set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
- rmem->size >> PAGE_SHIFT);
swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
false, nareas);
mem->for_alloc = true;
@@ -1025,6 +1045,14 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
dev->dma_io_tlb_mem = mem;
return 0;
+
+free_areas:
+ kfree(mem->areas);
+free_slots:
+ kfree(mem->slots);
+free_mem:
+ kfree(mem);
+ return rc;
}