Merge branch 'for-3.5/core' of git://git.kernel.dk/linux-block

Merge block/IO core bits from Jens Axboe: "This is a bit bigger on the core side than usual, but that is purely because we decided to hold off on parts of Tejun's submission on 3.4 to give it a bit more time to simmer. As a consequence, it's seen a long cycle in for-next. It contains: - Bug fix from Dan, wrong locking type. - Relax splice gifting restriction from Eric. - A ton of updates from Tejun, primarily for blkcg. This improves the code a lot, making the API nicer and cleaner, and also includes fixes for how we handle and tie policies and re-activate on switches. The changes also include generic bug fixes. - A simple fix from Vivek, along with a fix for doing proper delayed allocation of the blkcg stats." Fix up annoying conflict just due to different merge resolution in Documentation/feature-removal-schedule.txt * 'for-3.5/core' of git://git.kernel.dk/linux-block: (92 commits) blkcg: tg_stats_alloc_lock is an irq lock vmsplice: relax alignement requirements for SPLICE_F_GIFT blkcg: use radix tree to index blkgs from blkcg blkcg: fix blkcg->css ref leak in __blkg_lookup_create() block: fix elvpriv allocation failure handling block: collapse blk_alloc_request() into get_request() blkcg: collapse blkcg_policy_ops into blkcg_policy blkcg: embed struct blkg_policy_data in policy specific data blkcg: mass rename of blkcg API blkcg: style cleanups for blk-cgroup.h blkcg: remove blkio_group->path[] blkcg: blkg_rwstat_read() was missing inline blkcg: shoot down blkgs if all policies are deactivated blkcg: drop stuff unused after per-queue policy activation update blkcg: implement per-queue policy activation blkcg: add request_queue->root_blkg blkcg: make request_queue bypassing on allocation blkcg: make sure blkg_lookup() returns %NULL if @q is bypassing blkcg: make blkg_conf_prep() take @pol and return with queue lock held blkcg: remove static policy ID enums ...
2026-05-08 00:29:36 -04:00 · 2012-05-30 08:52:42 -07:00
parent 2f83766d4b ff26eaadf4
commit 0d167518e0
24 changed files with 2531 additions and 3037 deletions
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -269,6 +269,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set
 extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);

+#ifdef CONFIG_BLK_CGROUP
+int bio_associate_current(struct bio *bio);
+void bio_disassociate_task(struct bio *bio);
+#else	/* CONFIG_BLK_CGROUP */
+static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
+static inline void bio_disassociate_task(struct bio *bio) { }
+#endif	/* CONFIG_BLK_CGROUP */
+
 /*
 * bio_set is used to allow other portions of the IO system to
 * allocate their own private memory pools for bio and iovec structures.
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -14,6 +14,8 @@ struct bio;
 struct bio_integrity_payload;
 struct page;
 struct block_device;
+struct io_context;
+struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *, int);
 typedef void (bio_destructor_t) (struct bio *);

@@ -66,6 +68,14 @@ struct bio {
 	bio_end_io_t		*bi_end_io;

 	void			*bi_private;
+#ifdef CONFIG_BLK_CGROUP
+	/*
+	 * Optional ioc and css associated with this bio.  Put on bio
+	 * release.  Read comment on top of bio_associate_current().
+	 */
+	struct io_context	*bi_ioc;
+	struct cgroup_subsys_state *bi_css;
+#endif
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	struct bio_integrity_payload *bi_integrity;  /* data integrity */
 #endif
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -32,10 +32,17 @@ struct blk_trace;
 struct request;
 struct sg_io_hdr;
 struct bsg_job;
+struct blkcg_gq;

 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */

+/*
+ * Maximum number of blkcg policies allowed to be registered concurrently.
+ * Defined here to simplify include dependency.
+ */
+#define BLKCG_MAX_POLS		2
+
 struct request;
 typedef void (rq_end_io_fn)(struct request *, int);

@@ -363,6 +370,11 @@ struct request_queue {
 	struct list_head	timeout_list;

 	struct list_head	icq_list;
+#ifdef CONFIG_BLK_CGROUP
+	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);
+	struct blkcg_gq		*root_blkg;
+	struct list_head	blkg_list;
+#endif

 	struct queue_limits	limits;

@@ -390,12 +402,17 @@ struct request_queue {

 	struct mutex		sysfs_lock;

+	int			bypass_depth;
+
 #if defined(CONFIG_BLK_DEV_BSG)
 	bsg_job_fn		*bsg_job_fn;
 	int			bsg_job_size;
 	struct bsg_class_device bsg_dev;
 #endif

+#ifdef CONFIG_BLK_CGROUP
+	struct list_head	all_q_node;
+#endif
 #ifdef CONFIG_BLK_DEV_THROTTLING
 	/* Throttle data */
 	struct throtl_data *td;
@@ -407,7 +424,7 @@ struct request_queue {
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
-#define QUEUE_FLAG_ELVSWITCH	6	/* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BYPASS	6	/* act as dumb FIFO queue */
 #define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP	9	/* complete on same CPU-group */
@@ -491,6 +508,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
+#define blk_queue_bypass(q)	test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -28,12 +28,13 @@ typedef int (elevator_may_queue_fn) (struct request_queue *, int);

 typedef void (elevator_init_icq_fn) (struct io_cq *);
 typedef void (elevator_exit_icq_fn) (struct io_cq *);
-typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
+typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
+				   struct bio *, gfp_t);
 typedef void (elevator_put_req_fn) (struct request *);
 typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
 typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);

-typedef void *(elevator_init_fn) (struct request_queue *);
+typedef int (elevator_init_fn) (struct request_queue *);
 typedef void (elevator_exit_fn) (struct elevator_queue *);

 struct elevator_ops
@@ -129,7 +130,8 @@ extern void elv_unregister_queue(struct request_queue *q);
 extern int elv_may_queue(struct request_queue *, int);
 extern void elv_abort_queue(struct request_queue *);
 extern void elv_completed_request(struct request_queue *, struct request *);
-extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
+extern int elv_set_request(struct request_queue *q, struct request *rq,
+			   struct bio *bio, gfp_t gfp_mask);
 extern void elv_put_request(struct request_queue *, struct request *);
 extern void elv_drain_elevator(struct request_queue *);

--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -6,11 +6,7 @@
 #include <linux/workqueue.h>

 enum {
-	ICQ_IOPRIO_CHANGED	= 1 << 0,
-	ICQ_CGROUP_CHANGED	= 1 << 1,
 	ICQ_EXITED		= 1 << 2,
-
-	ICQ_CHANGED_MASK	= ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED,
 };

 /*
@@ -100,6 +96,7 @@ struct io_cq {
 */
 struct io_context {
 	atomic_long_t refcount;
+	atomic_t active_ref;
 	atomic_t nr_tasks;

 	/* all the fields below are protected by this lock */
@@ -120,29 +117,37 @@ struct io_context {
 	struct work_struct release_work;
 };

-static inline struct io_context *ioc_task_link(struct io_context *ioc)
+/**
+ * get_io_context_active - get active reference on ioc
+ * @ioc: ioc of interest
+ *
+ * Only iocs with active reference can issue new IOs.  This function
+ * acquires an active reference on @ioc.  The caller must already have an
+ * active reference on @ioc.
+ */
+static inline void get_io_context_active(struct io_context *ioc)
 {
-	/*
-	 * if ref count is zero, don't allow sharing (ioc is going away, it's
-	 * a race).
-	 */
-	if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) {
-		atomic_inc(&ioc->nr_tasks);
-		return ioc;
-	}
+	WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0);
+	WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0);
+	atomic_long_inc(&ioc->refcount);
+	atomic_inc(&ioc->active_ref);
+}

-	return NULL;
+static inline void ioc_task_link(struct io_context *ioc)
+{
+	get_io_context_active(ioc);
+
+	WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0);
+	atomic_inc(&ioc->nr_tasks);
 }

 struct task_struct;
 #ifdef CONFIG_BLOCK
 void put_io_context(struct io_context *ioc);
+void put_io_context_active(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
-void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
-void ioc_cgroup_changed(struct io_context *ioc);
-unsigned int icq_get_changed(struct io_cq *icq);
 #else
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -41,27 +41,15 @@ enum {
 	IOPRIO_WHO_USER,
 };

+/*
+ * Fallback BE priority
+ */
+#define IOPRIO_NORM	(4)
+
 /*
 * if process has set io priority explicitly, use that. if not, convert
 * the cpu scheduler nice value to an io priority
 */
-#define IOPRIO_NORM	(4)
-static inline int task_ioprio(struct io_context *ioc)
-{
-	if (ioprio_valid(ioc->ioprio))
-		return IOPRIO_PRIO_DATA(ioc->ioprio);
-
-	return IOPRIO_NORM;
-}
-
-static inline int task_ioprio_class(struct io_context *ioc)
-{
-	if (ioprio_valid(ioc->ioprio))
-		return IOPRIO_PRIO_CLASS(ioc->ioprio);
-
-	return IOPRIO_CLASS_BE;
-}
-
 static inline int task_nice_ioprio(struct task_struct *task)
 {
 	return (task_nice(task) + 20) / 5;