erofs: support to readahead dirent blocks in erofs_readdir()

This patch supports to readahead more blocks in erofs_readdir(), it can
enhance readdir performance in large direcotry.

readdir test in a large directory which contains 12000 sub-files.

		files_per_second
Before:		926385.54
After:		2380435.562

Meanwhile, let's introduces a new sysfs entry to control readahead
bytes to provide more flexible policy for readahead of readdir().
- location: /sys/fs/erofs/<disk>/dir_ra_bytes
- default value: 16384
- disable readahead: set the value to 0

Signed-off-by: Chao Yu <chao@kernel.org>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20250721021352.2495371-1-chao@kernel.org
[ Gao Xiang: minor styling adjustment. ]
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
This commit is contained in:
Chao Yu
2025-07-21 10:13:52 +08:00
committed by Gao Xiang
parent 414091322c
commit df0ce6cefa
5 changed files with 29 additions and 0 deletions

View File

@@ -35,3 +35,11 @@ Description: Used to set or show hardware accelerators in effect
and multiple accelerators are separated by '\n'.
Supported accelerator(s): qat_deflate.
Disable all accelerators with an empty string (echo > accel).
What: /sys/fs/erofs/<disk>/dir_ra_bytes
Date: July 2025
Contact: "Chao Yu" <chao@kernel.org>
Description: Used to set or show readahead bytes during readdir(), by
default the value is 16384.
- 0: disable readahead.

View File

@@ -48,8 +48,12 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
struct inode *dir = file_inode(f);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct super_block *sb = dir->i_sb;
struct file_ra_state *ra = &f->f_ra;
unsigned long bsz = sb->s_blocksize;
unsigned int ofs = erofs_blkoff(sb, ctx->pos);
pgoff_t ra_pages = DIV_ROUND_UP_POW2(
EROFS_I_SB(dir)->dir_ra_bytes, PAGE_SIZE);
pgoff_t nr_pages = DIV_ROUND_UP_POW2(dir->i_size, PAGE_SIZE);
int err = 0;
bool initial = true;
@@ -64,6 +68,16 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
break;
}
/* readahead blocks to enhance performance for large directories */
if (ra_pages) {
pgoff_t idx = DIV_ROUND_UP_POW2(ctx->pos, PAGE_SIZE);
pgoff_t pages = min(nr_pages - idx, ra_pages);
if (pages > 1 && !ra_has_index(ra, idx))
page_cache_sync_readahead(dir->i_mapping, ra,
f, idx, pages);
}
de = erofs_bread(&buf, dbstart, true);
if (IS_ERR(de)) {
erofs_err(sb, "failed to readdir of logical block %llu of nid %llu",

View File

@@ -159,6 +159,7 @@ struct erofs_sb_info {
/* sysfs support */
struct kobject s_kobj; /* /sys/fs/erofs/<devname> */
struct completion s_kobj_unregister;
erofs_off_t dir_ra_bytes;
/* fscache support */
struct fscache_volume *volume;
@@ -259,6 +260,9 @@ static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid)
#define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1)
#define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2)
/* default readahead size of directories */
#define EROFS_DIR_RA_BYTES 16384
struct erofs_inode {
erofs_nid_t nid;

View File

@@ -731,6 +731,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
sbi->dir_ra_bytes = EROFS_DIR_RA_BYTES;
erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid);
return 0;
}

View File

@@ -65,12 +65,14 @@ EROFS_ATTR_FUNC(drop_caches, 0200);
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
EROFS_ATTR_FUNC(accel, 0644);
#endif
EROFS_ATTR_RW_UI(dir_ra_bytes, erofs_sb_info);
static struct attribute *erofs_sb_attrs[] = {
#ifdef CONFIG_EROFS_FS_ZIP
ATTR_LIST(sync_decompress),
ATTR_LIST(drop_caches),
#endif
ATTR_LIST(dir_ra_bytes),
NULL,
};
ATTRIBUTE_GROUPS(erofs_sb);