Re: [PATCH v6 6/6] zsmalloc: Implement writeback mechanism for zsmalloc

From: Johannes Weiner
Date: Wed Nov 23 2022 - 12:18:44 EST


On Tue, Nov 22, 2022 at 03:37:29PM +0900, Sergey Senozhatsky wrote:
> On (22/11/18 16:15), Nhat Pham wrote:
> [..]
> > +static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
> > +{
> > + int i, obj_idx, ret = 0;
> > + unsigned long handle;
> > + struct zspage *zspage;
> > + struct page *page;
> > + enum fullness_group fullness;
> > +
> > + /* Lock LRU and fullness list */
> > + spin_lock(&pool->lock);
> > + if (list_empty(&pool->lru)) {
> > + spin_unlock(&pool->lock);
> > + return -EINVAL;
> > + }
> > +
> > + for (i = 0; i < retries; i++) {
> > + struct size_class *class;
> > +
> > + zspage = list_last_entry(&pool->lru, struct zspage, lru);
> > + list_del(&zspage->lru);
> > +
> > + /* zs_free may free objects, but not the zspage and handles */
> > + zspage->under_reclaim = true;
> > +
> > + class = zspage_class(pool, zspage);
> > + fullness = get_fullness_group(class, zspage);
> > +
> > + /* Lock out object allocations and object compaction */
> > + remove_zspage(class, zspage, fullness);
> > +
> > + spin_unlock(&pool->lock);
> > +
> > + /* Lock backing pages into place */
> > + lock_zspage(zspage);
> > +
> > + obj_idx = 0;
> > + page = zspage->first_page;
> > + while (1) {
> > + handle = find_alloced_obj(class, page, &obj_idx);
> > + if (!handle) {
> > + page = get_next_page(page);
> > + if (!page)
> > + break;
> > + obj_idx = 0;
> > + continue;
> > + }
> > +
> > + /*
> > + * This will write the object and call zs_free.
> > + *
> > + * zs_free will free the object, but the
> > + * under_reclaim flag prevents it from freeing
> > + * the zspage altogether. This is necessary so
> > + * that we can continue working with the
> > + * zspage potentially after the last object
> > + * has been freed.
> > + */
> > + ret = pool->zpool_ops->evict(pool->zpool, handle);
> > + if (ret)
> > + goto next;
> > +
> > + obj_idx++;
> > + }
> > +
> > +next:
> > + /* For freeing the zspage, or putting it back in the pool and LRU list. */
> > + spin_lock(&pool->lock);
> > + zspage->under_reclaim = false;
> > +
> > + if (!get_zspage_inuse(zspage)) {
> > + /*
> > + * Fullness went stale as zs_free() won't touch it
> > + * while the page is removed from the pool. Fix it
> > + * up for the check in __free_zspage().
> > + */
> > + zspage->fullness = ZS_EMPTY;
> > +
> > + __free_zspage(pool, class, zspage);
> > + spin_unlock(&pool->lock);
> > + return 0;
> > + }
> > +
> > + putback_zspage(class, zspage);
> > + list_add(&zspage->lru, &pool->lru);
> > + unlock_zspage(zspage);
>
> We probably better to cond_resched() somewhere here. Or in zs_zpool_shrink()
> loop.

Hm, yeah I suppose that could make sense if we try more than one page.

We always hold either the pool lock or the page locks, and we probably
don't want to schedule with the page locks held. So it would need to
actually lockbreak the pool lock. And then somebody can steal the page
and empty the LRU under us, so we need to check that on looping, too.

Something like this?

for (i = 0; i < retries; i++) {
spin_lock(&pool->lock);
if (list_empty(&pool->lru)) {
spin_unlock(&pool->lock);
return -EINVAL;
}
zspage = list_last_entry(&pool->lru, ...);

...

putback_zspage(class, zspage);
list_add(&zspage->lru, &pool->lru);
unlock_zspage(zspage);
spin_unlock(&pool->lock);

cond_resched();
}
return -EAGAIN;