mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-22 19:45:45 -04:00
netfs: Speed up buffered reading
Improve the efficiency of buffered reads in a number of ways:
(1) Overhaul the algorithm in general so that it's a lot more compact and
split the read submission code between buffered and unbuffered
versions. The unbuffered version can be vastly simplified.
(2) Read-result collection is handed off to a work queue rather than being
done in the I/O thread. Multiple subrequests can be processes
simultaneously.
(3) When a subrequest is collected, any folios it fully spans are
collected and "spare" data on either side is donated to either the
previous or the next subrequest in the sequence.
Notes:
(*) Readahead expansion is massively slows down fio, presumably because it
causes a load of extra allocations, both folio and xarray, up front
before RPC requests can be transmitted.
(*) RDMA with cifs does appear to work, both with SIW and RXE.
(*) PG_private_2-based reading and copy-to-cache is split out into its own
file and altered to use folio_queue. Note that the copy to the cache
now creates a new write transaction against the cache and adds the
folios to be copied into it. This allows it to use part of the
writeback I/O code.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
committed by
Christian Brauner
parent
2e45b92297
commit
ee4cdf7ba8
@@ -27,6 +27,7 @@ struct folio_queue {
|
||||
struct folio_queue *prev; /* Previous queue segment of NULL */
|
||||
unsigned long marks; /* 1-bit mark per folio */
|
||||
unsigned long marks2; /* Second 1-bit mark per folio */
|
||||
unsigned long marks3; /* Third 1-bit mark per folio */
|
||||
#if PAGEVEC_SIZE > BITS_PER_LONG
|
||||
#error marks is not big enough
|
||||
#endif
|
||||
@@ -39,6 +40,7 @@ static inline void folioq_init(struct folio_queue *folioq)
|
||||
folioq->prev = NULL;
|
||||
folioq->marks = 0;
|
||||
folioq->marks2 = 0;
|
||||
folioq->marks3 = 0;
|
||||
}
|
||||
|
||||
static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq)
|
||||
@@ -87,6 +89,21 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot)
|
||||
clear_bit(slot, &folioq->marks2);
|
||||
}
|
||||
|
||||
static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot)
|
||||
{
|
||||
return test_bit(slot, &folioq->marks3);
|
||||
}
|
||||
|
||||
static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot)
|
||||
{
|
||||
set_bit(slot, &folioq->marks3);
|
||||
}
|
||||
|
||||
static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot)
|
||||
{
|
||||
clear_bit(slot, &folioq->marks3);
|
||||
}
|
||||
|
||||
static inline unsigned int __folio_order(struct folio *folio)
|
||||
{
|
||||
if (!folio_test_large(folio))
|
||||
@@ -133,6 +150,7 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot)
|
||||
folioq->vec.folios[slot] = NULL;
|
||||
folioq_unmark(folioq, slot);
|
||||
folioq_unmark2(folioq, slot);
|
||||
folioq_unmark3(folioq, slot);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_FOLIO_QUEUE_H */
|
||||
|
||||
@@ -178,36 +178,43 @@ struct netfs_io_subrequest {
|
||||
unsigned long long start; /* Where to start the I/O */
|
||||
size_t len; /* Size of the I/O */
|
||||
size_t transferred; /* Amount of data transferred */
|
||||
size_t consumed; /* Amount of read data consumed */
|
||||
size_t prev_donated; /* Amount of data donated from previous subreq */
|
||||
size_t next_donated; /* Amount of data donated from next subreq */
|
||||
refcount_t ref;
|
||||
short error; /* 0 or error that occurred */
|
||||
unsigned short debug_index; /* Index in list (for debugging output) */
|
||||
unsigned int nr_segs; /* Number of segs in io_iter */
|
||||
enum netfs_io_source source; /* Where to read from/write to */
|
||||
unsigned char stream_nr; /* I/O stream this belongs to */
|
||||
unsigned char curr_folioq_slot; /* Folio currently being read */
|
||||
unsigned char curr_folio_order; /* Order of folio */
|
||||
struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
|
||||
unsigned long flags;
|
||||
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
|
||||
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
|
||||
#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */
|
||||
#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
|
||||
#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
|
||||
#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
|
||||
#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
|
||||
#define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */
|
||||
#define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */
|
||||
#define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */
|
||||
#define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */
|
||||
#define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */
|
||||
#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */
|
||||
};
|
||||
|
||||
enum netfs_io_origin {
|
||||
NETFS_READAHEAD, /* This read was triggered by readahead */
|
||||
NETFS_READPAGE, /* This read is a synchronous read */
|
||||
NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
|
||||
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
|
||||
NETFS_DIO_READ, /* This is a direct I/O read */
|
||||
NETFS_WRITEBACK, /* This write was triggered by writepages */
|
||||
NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
|
||||
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
|
||||
NETFS_DIO_WRITE, /* This is a direct I/O write */
|
||||
NETFS_PGPRIV2_COPY_TO_CACHE, /* [DEPRECATED] This is writing read data to the cache */
|
||||
nr__netfs_io_origin
|
||||
} __mode(byte);
|
||||
|
||||
@@ -224,6 +231,7 @@ struct netfs_io_request {
|
||||
struct address_space *mapping; /* The mapping being accessed */
|
||||
struct kiocb *iocb; /* AIO completion vector */
|
||||
struct netfs_cache_resources cache_resources;
|
||||
struct readahead_control *ractl; /* Readahead descriptor */
|
||||
struct list_head proc_link; /* Link in netfs_iorequests */
|
||||
struct list_head subrequests; /* Contributory I/O operations */
|
||||
struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */
|
||||
@@ -244,12 +252,10 @@ struct netfs_io_request {
|
||||
unsigned int nr_group_rel; /* Number of refs to release on ->group */
|
||||
spinlock_t lock; /* Lock for queuing subreqs */
|
||||
atomic_t nr_outstanding; /* Number of ops in progress */
|
||||
atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
|
||||
size_t upper_len; /* Length can be extended to here */
|
||||
unsigned long long submitted; /* Amount submitted for I/O so far */
|
||||
unsigned long long len; /* Length of the request */
|
||||
size_t transferred; /* Amount to be indicated as transferred */
|
||||
short error; /* 0 or error that occurred */
|
||||
long error; /* 0 or error that occurred */
|
||||
enum netfs_io_origin origin; /* Origin of the request */
|
||||
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
|
||||
u8 buffer_head_slot; /* First slot in ->buffer */
|
||||
@@ -260,9 +266,9 @@ struct netfs_io_request {
|
||||
unsigned long long collected_to; /* Point we've collected to */
|
||||
unsigned long long cleaned_to; /* Position we've cleaned folios to */
|
||||
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
|
||||
size_t prev_donated; /* Fallback for subreq->prev_donated */
|
||||
refcount_t ref;
|
||||
unsigned long flags;
|
||||
#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */
|
||||
#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
|
||||
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
|
||||
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
|
||||
@@ -274,6 +280,7 @@ struct netfs_io_request {
|
||||
#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
|
||||
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
|
||||
#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
|
||||
#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */
|
||||
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
|
||||
* write to cache on read */
|
||||
const struct netfs_request_ops *netfs_ops;
|
||||
@@ -292,7 +299,7 @@ struct netfs_request_ops {
|
||||
|
||||
/* Read request handling */
|
||||
void (*expand_readahead)(struct netfs_io_request *rreq);
|
||||
bool (*clamp_length)(struct netfs_io_subrequest *subreq);
|
||||
int (*prepare_read)(struct netfs_io_subrequest *subreq);
|
||||
void (*issue_read)(struct netfs_io_subrequest *subreq);
|
||||
bool (*is_still_valid)(struct netfs_io_request *rreq);
|
||||
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
|
||||
@@ -422,7 +429,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp);
|
||||
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
|
||||
|
||||
/* (Sub)request management API. */
|
||||
void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
|
||||
void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
|
||||
bool was_async);
|
||||
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
|
||||
int error, bool was_async);
|
||||
void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
|
||||
enum netfs_sreq_ref_trace what);
|
||||
void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
|
||||
|
||||
Reference in New Issue
Block a user