1 files changed, 0 insertions, 429 deletions
diff --git a/target/linux/brcm2708/patches-4.4/0290-drm-vc4-improve-throughput-by-pipelining-binning-and.patch b/target/linux/brcm2708/patches-4.4/0290-drm-vc4-improve-throughput-by-pipelining-binning-and.patch
deleted file mode 100644
index addef40932..0000000000
--- a/target/linux/brcm2708/patches-4.4/0290-drm-vc4-improve-throughput-by-pipelining-binning-and.patch
+++ /dev/null
@@ -1,429 +0,0 @@
-From c661b60a4057da2d212fc5e8e2a56d463d912974 Mon Sep 17 00:00:00 2001
-From: Varad Gautam <varadgautam@gmail.com>
-Date: Wed, 17 Feb 2016 19:08:21 +0530
-Subject: [PATCH] drm/vc4: improve throughput by pipelining binning and
- rendering jobs
-
-The hardware provides us with separate threads for binning and
-rendering, and the existing model waits for them both to complete
-before submitting the next job.
-
-Splitting the binning and rendering submissions reduces idle time and
-gives us approx 20-30% speedup with some x11perf tests such as -line10
-and -tilerect1.  Improves openarena performance by 1.01897% +/-
-0.247857% (n=16).
-
-Thanks to anholt for suggesting this.
-
-v2: Rebase on the spurious resets fix (change by anholt).
-
-Signed-off-by: Varad Gautam <varadgautam@gmail.com>
-Reviewed-by: Eric Anholt <eric@anholt.net>
-Signed-off-by: Eric Anholt <eric@anholt.net>
-(cherry picked from commit ca26d28bbaa39f31d5e7e4812603b015c8d54207)
----
- drivers/gpu/drm/vc4/vc4_drv.h |  37 +++++++++----
- drivers/gpu/drm/vc4/vc4_gem.c | 123 ++++++++++++++++++++++++++++++------------
- drivers/gpu/drm/vc4/vc4_irq.c |  58 ++++++++++++++++----
- 3 files changed, 166 insertions(+), 52 deletions(-)
-
---- a/drivers/gpu/drm/vc4/vc4_drv.h
-+++ b/drivers/gpu/drm/vc4/vc4_drv.h
-@@ -53,7 +53,7 @@ struct vc4_dev {
- 	/* Protects bo_cache and the BO stats. */
- 	struct mutex bo_lock;
- 
--	/* Sequence number for the last job queued in job_list.
-+	/* Sequence number for the last job queued in bin_job_list.
- 	 * Starts at 0 (no jobs emitted).
- 	 */
- 	uint64_t emit_seqno;
-@@ -63,11 +63,19 @@ struct vc4_dev {
- 	 */
- 	uint64_t finished_seqno;
- 
--	/* List of all struct vc4_exec_info for jobs to be executed.
--	 * The first job in the list is the one currently programmed
--	 * into ct0ca/ct1ca for execution.
-+	/* List of all struct vc4_exec_info for jobs to be executed in
-+	 * the binner.  The first job in the list is the one currently
-+	 * programmed into ct0ca for execution.
-+	 */
-+	struct list_head bin_job_list;
-+
-+	/* List of all struct vc4_exec_info for jobs that have
-+	 * completed binning and are ready for rendering.  The first
-+	 * job in the list is the one currently programmed into ct1ca
-+	 * for execution.
- 	 */
--	struct list_head job_list;
-+	struct list_head render_job_list;
-+
- 	/* List of the finished vc4_exec_infos waiting to be freed by
- 	 * job_done_work.
- 	 */
-@@ -291,11 +299,20 @@ struct vc4_exec_info {
- };
- 
- static inline struct vc4_exec_info *
--vc4_first_job(struct vc4_dev *vc4)
-+vc4_first_bin_job(struct vc4_dev *vc4)
-+{
-+	if (list_empty(&vc4->bin_job_list))
-+		return NULL;
-+	return list_first_entry(&vc4->bin_job_list, struct vc4_exec_info, head);
-+}
-+
-+static inline struct vc4_exec_info *
-+vc4_first_render_job(struct vc4_dev *vc4)
- {
--	if (list_empty(&vc4->job_list))
-+	if (list_empty(&vc4->render_job_list))
- 		return NULL;
--	return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
-+	return list_first_entry(&vc4->render_job_list,
-+				struct vc4_exec_info, head);
- }
- 
- /**
-@@ -410,7 +427,9 @@ int vc4_wait_seqno_ioctl(struct drm_devi
- 			 struct drm_file *file_priv);
- int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
- 		      struct drm_file *file_priv);
--void vc4_submit_next_job(struct drm_device *dev);
-+void vc4_submit_next_bin_job(struct drm_device *dev);
-+void vc4_submit_next_render_job(struct drm_device *dev);
-+void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec);
- int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
- 		       uint64_t timeout_ns, bool interruptible);
- void vc4_job_handle_completed(struct vc4_dev *vc4);
---- a/drivers/gpu/drm/vc4/vc4_gem.c
-+++ b/drivers/gpu/drm/vc4/vc4_gem.c
-@@ -154,10 +154,10 @@ vc4_save_hang_state(struct drm_device *d
- 	struct vc4_dev *vc4 = to_vc4_dev(dev);
- 	struct drm_vc4_get_hang_state *state;
- 	struct vc4_hang_state *kernel_state;
--	struct vc4_exec_info *exec;
-+	struct vc4_exec_info *exec[2];
- 	struct vc4_bo *bo;
- 	unsigned long irqflags;
--	unsigned int i, unref_list_count;
-+	unsigned int i, j, unref_list_count, prev_idx;
- 
- 	kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
- 	if (!kernel_state)
-@@ -166,37 +166,55 @@ vc4_save_hang_state(struct drm_device *d
- 	state = &kernel_state->user_state;
- 
- 	spin_lock_irqsave(&vc4->job_lock, irqflags);
--	exec = vc4_first_job(vc4);
--	if (!exec) {
-+	exec[0] = vc4_first_bin_job(vc4);
-+	exec[1] = vc4_first_render_job(vc4);
-+	if (!exec[0] && !exec[1]) {
- 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
- 		return;
- 	}
- 
--	unref_list_count = 0;
--	list_for_each_entry(bo, &exec->unref_list, unref_head)
--		unref_list_count++;
--
--	state->bo_count = exec->bo_count + unref_list_count;
--	kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
--				   GFP_ATOMIC);
-+	/* Get the bos from both binner and renderer into hang state. */
-+	state->bo_count = 0;
-+	for (i = 0; i < 2; i++) {
-+		if (!exec[i])
-+			continue;
-+
-+		unref_list_count = 0;
-+		list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
-+			unref_list_count++;
-+		state->bo_count += exec[i]->bo_count + unref_list_count;
-+	}
-+
-+	kernel_state->bo = kcalloc(state->bo_count,
-+				   sizeof(*kernel_state->bo), GFP_ATOMIC);
-+
- 	if (!kernel_state->bo) {
- 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
- 		return;
- 	}
- 
--	for (i = 0; i < exec->bo_count; i++) {
--		drm_gem_object_reference(&exec->bo[i]->base);
--		kernel_state->bo[i] = &exec->bo[i]->base;
--	}
-+	prev_idx = 0;
-+	for (i = 0; i < 2; i++) {
-+		if (!exec[i])
-+			continue;
-+
-+		for (j = 0; j < exec[i]->bo_count; j++) {
-+			drm_gem_object_reference(&exec[i]->bo[j]->base);
-+			kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base;
-+		}
- 
--	list_for_each_entry(bo, &exec->unref_list, unref_head) {
--		drm_gem_object_reference(&bo->base.base);
--		kernel_state->bo[i] = &bo->base.base;
--		i++;
-+		list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
-+			drm_gem_object_reference(&bo->base.base);
-+			kernel_state->bo[j + prev_idx] = &bo->base.base;
-+			j++;
-+		}
-+		prev_idx = j + 1;
- 	}
- 
--	state->start_bin = exec->ct0ca;
--	state->start_render = exec->ct1ca;
-+	if (exec[0])
-+		state->start_bin = exec[0]->ct0ca;
-+	if (exec[1])
-+		state->start_render = exec[1]->ct1ca;
- 
- 	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
- 
-@@ -272,13 +290,15 @@ vc4_hangcheck_elapsed(unsigned long data
- 	struct vc4_dev *vc4 = to_vc4_dev(dev);
- 	uint32_t ct0ca, ct1ca;
- 	unsigned long irqflags;
--	struct vc4_exec_info *exec;
-+	struct vc4_exec_info *bin_exec, *render_exec;
- 
- 	spin_lock_irqsave(&vc4->job_lock, irqflags);
--	exec = vc4_first_job(vc4);
-+
-+	bin_exec = vc4_first_bin_job(vc4);
-+	render_exec = vc4_first_render_job(vc4);
- 
- 	/* If idle, we can stop watching for hangs. */
--	if (!exec) {
-+	if (!bin_exec && !render_exec) {
- 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
- 		return;
- 	}
-@@ -289,9 +309,12 @@ vc4_hangcheck_elapsed(unsigned long data
- 	/* If we've made any progress in execution, rearm the timer
- 	 * and wait.
- 	 */
--	if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) {
--		exec->last_ct0ca = ct0ca;
--		exec->last_ct1ca = ct1ca;
-+	if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
-+	    (render_exec && ct1ca != render_exec->last_ct1ca)) {
-+		if (bin_exec)
-+			bin_exec->last_ct0ca = ct0ca;
-+		if (render_exec)
-+			render_exec->last_ct1ca = ct1ca;
- 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
- 		vc4_queue_hangcheck(dev);
- 		return;
-@@ -391,11 +414,13 @@ vc4_flush_caches(struct drm_device *dev)
-  * The job_lock should be held during this.
-  */
- void
--vc4_submit_next_job(struct drm_device *dev)
-+vc4_submit_next_bin_job(struct drm_device *dev)
- {
- 	struct vc4_dev *vc4 = to_vc4_dev(dev);
--	struct vc4_exec_info *exec = vc4_first_job(vc4);
-+	struct vc4_exec_info *exec;
- 
-+again:
-+	exec = vc4_first_bin_job(vc4);
- 	if (!exec)
- 		return;
- 
-@@ -405,11 +430,40 @@ vc4_submit_next_job(struct drm_device *d
- 	V3D_WRITE(V3D_BPOA, 0);
- 	V3D_WRITE(V3D_BPOS, 0);
- 
--	if (exec->ct0ca != exec->ct0ea)
-+	/* Either put the job in the binner if it uses the binner, or
-+	 * immediately move it to the to-be-rendered queue.
-+	 */
-+	if (exec->ct0ca != exec->ct0ea) {
- 		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
-+	} else {
-+		vc4_move_job_to_render(dev, exec);
-+		goto again;
-+	}
-+}
-+
-+void
-+vc4_submit_next_render_job(struct drm_device *dev)
-+{
-+	struct vc4_dev *vc4 = to_vc4_dev(dev);
-+	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
-+
-+	if (!exec)
-+		return;
-+
- 	submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
- }
- 
-+void
-+vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
-+{
-+	struct vc4_dev *vc4 = to_vc4_dev(dev);
-+	bool was_empty = list_empty(&vc4->render_job_list);
-+
-+	list_move_tail(&exec->head, &vc4->render_job_list);
-+	if (was_empty)
-+		vc4_submit_next_render_job(dev);
-+}
-+
- static void
- vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
- {
-@@ -448,14 +502,14 @@ vc4_queue_submit(struct drm_device *dev,
- 	exec->seqno = seqno;
- 	vc4_update_bo_seqnos(exec, seqno);
- 
--	list_add_tail(&exec->head, &vc4->job_list);
-+	list_add_tail(&exec->head, &vc4->bin_job_list);
- 
- 	/* If no job was executing, kick ours off.  Otherwise, it'll
--	 * get started when the previous job's frame done interrupt
-+	 * get started when the previous job's flush done interrupt
- 	 * occurs.
- 	 */
--	if (vc4_first_job(vc4) == exec) {
--		vc4_submit_next_job(dev);
-+	if (vc4_first_bin_job(vc4) == exec) {
-+		vc4_submit_next_bin_job(dev);
- 		vc4_queue_hangcheck(dev);
- 	}
- 
-@@ -849,7 +903,8 @@ vc4_gem_init(struct drm_device *dev)
- {
- 	struct vc4_dev *vc4 = to_vc4_dev(dev);
- 
--	INIT_LIST_HEAD(&vc4->job_list);
-+	INIT_LIST_HEAD(&vc4->bin_job_list);
-+	INIT_LIST_HEAD(&vc4->render_job_list);
- 	INIT_LIST_HEAD(&vc4->job_done_list);
- 	INIT_LIST_HEAD(&vc4->seqno_cb_list);
- 	spin_lock_init(&vc4->job_lock);
---- a/drivers/gpu/drm/vc4/vc4_irq.c
-+++ b/drivers/gpu/drm/vc4/vc4_irq.c
-@@ -30,6 +30,10 @@
-  * disables that specific interrupt, and 0s written are ignored
-  * (reading either one returns the set of enabled interrupts).
-  *
-+ * When we take a binning flush done interrupt, we need to submit the
-+ * next frame for binning and move the finished frame to the render
-+ * thread.
-+ *
-  * When we take a render frame interrupt, we need to wake the
-  * processes waiting for some frame to be done, and get the next frame
-  * submitted ASAP (so the hardware doesn't sit idle when there's work
-@@ -44,6 +48,7 @@
- #include "vc4_regs.h"
- 
- #define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
-+			 V3D_INT_FLDONE | \
- 			 V3D_INT_FRDONE)
- 
- DECLARE_WAIT_QUEUE_HEAD(render_wait);
-@@ -77,7 +82,7 @@ vc4_overflow_mem_work(struct work_struct
- 		unsigned long irqflags;
- 
- 		spin_lock_irqsave(&vc4->job_lock, irqflags);
--		current_exec = vc4_first_job(vc4);
-+		current_exec = vc4_first_bin_job(vc4);
- 		if (current_exec) {
- 			vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
- 			list_add_tail(&vc4->overflow_mem->unref_head,
-@@ -98,17 +103,43 @@ vc4_overflow_mem_work(struct work_struct
- }
- 
- static void
--vc4_irq_finish_job(struct drm_device *dev)
-+vc4_irq_finish_bin_job(struct drm_device *dev)
-+{
-+	struct vc4_dev *vc4 = to_vc4_dev(dev);
-+	struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
-+
-+	if (!exec)
-+		return;
-+
-+	vc4_move_job_to_render(dev, exec);
-+	vc4_submit_next_bin_job(dev);
-+}
-+
-+static void
-+vc4_cancel_bin_job(struct drm_device *dev)
-+{
-+	struct vc4_dev *vc4 = to_vc4_dev(dev);
-+	struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
-+
-+	if (!exec)
-+		return;
-+
-+	list_move_tail(&exec->head, &vc4->bin_job_list);
-+	vc4_submit_next_bin_job(dev);
-+}
-+
-+static void
-+vc4_irq_finish_render_job(struct drm_device *dev)
- {
- 	struct vc4_dev *vc4 = to_vc4_dev(dev);
--	struct vc4_exec_info *exec = vc4_first_job(vc4);
-+	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
- 
- 	if (!exec)
- 		return;
- 
- 	vc4->finished_seqno++;
- 	list_move_tail(&exec->head, &vc4->job_done_list);
--	vc4_submit_next_job(dev);
-+	vc4_submit_next_render_job(dev);
- 
- 	wake_up_all(&vc4->job_wait_queue);
- 	schedule_work(&vc4->job_done_work);
-@@ -125,9 +156,10 @@ vc4_irq(int irq, void *arg)
- 	barrier();
- 	intctl = V3D_READ(V3D_INTCTL);
- 
--	/* Acknowledge the interrupts we're handling here. The render
--	 * frame done interrupt will be cleared, while OUTOMEM will
--	 * stay high until the underlying cause is cleared.
-+	/* Acknowledge the interrupts we're handling here. The binner
-+	 * last flush / render frame done interrupt will be cleared,
-+	 * while OUTOMEM will stay high until the underlying cause is
-+	 * cleared.
- 	 */
- 	V3D_WRITE(V3D_INTCTL, intctl);
- 
-@@ -138,9 +170,16 @@ vc4_irq(int irq, void *arg)
- 		status = IRQ_HANDLED;
- 	}
- 
-+	if (intctl & V3D_INT_FLDONE) {
-+		spin_lock(&vc4->job_lock);
-+		vc4_irq_finish_bin_job(dev);
-+		spin_unlock(&vc4->job_lock);
-+		status = IRQ_HANDLED;
-+	}
-+
- 	if (intctl & V3D_INT_FRDONE) {
- 		spin_lock(&vc4->job_lock);
--		vc4_irq_finish_job(dev);
-+		vc4_irq_finish_render_job(dev);
- 		spin_unlock(&vc4->job_lock);
- 		status = IRQ_HANDLED;
- 	}
-@@ -205,6 +244,7 @@ void vc4_irq_reset(struct drm_device *de
- 	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
- 
- 	spin_lock_irqsave(&vc4->job_lock, irqflags);
--	vc4_irq_finish_job(dev);
-+	vc4_cancel_bin_job(dev);
-+	vc4_irq_finish_render_job(dev);
- 	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
- }