Skip to content

Commit

Permalink
drm/i915/execlists: Delay writing to ELSP until HW has processed the …
Browse files Browse the repository at this point in the history
…previous write

The hardware needs some time to process the information received in the
ExecList Submission Port, and expects us to not write anything more until
it has 'acknowledged' this new submission by sending an IDLE_ACTIVE or
PREEMPTED CSB event.

If we do not follow this, the driver could write new data into the ELSP
before HW had finishing fetching the previous one, putting us in
'undefined behaviour' space.

This seems to be the problem causing the spurious PREEMPTED & COMPLETE
events after a COMPLETE like the one below:

[] vcs0: sw rd pointer = 2, hw wr pointer = 0, current 'head' = 3.
[] vcs0:  Execlist CSB[0]: 0x00000018 _ 0x00000007
[] vcs0:  Execlist CSB[1]: 0x00000001 _ 0x00000000
[] vcs0:  Execlist CSB[2]: 0x00000018 _ 0x00000007  <<< COMPLETE
[] vcs0:  Execlist CSB[3]: 0x00000012 _ 0x00000007  <<< PREEMPTED & COMPLETE
[] vcs0:  Execlist CSB[4]: 0x00008002 _ 0x00000006
[] vcs0:  Execlist CSB[5]: 0x00000014 _ 0x00000006

The ELSP writes that lead to this CSB sequence show that the HW hadn't
started executing the previous execlist (the one with only ctx 0x6) by the
time the new one was submitted; this is a bit more clear in the data
show in the EXECLIST_STATUS register at the time of the ELSP write.

[] vcs0: ELSP[0] = 0x0_0        [execlist1] - status_reg = 0x0_302
[] vcs0: ELSP[1] = 0x6_fedb2119 [execlist0] - status_reg = 0x0_8302

[] vcs0: ELSP[2] = 0x7_fedaf119 [execlist1] - status_reg = 0x0_8308
[] vcs0: ELSP[3] = 0x6_fedb2119 [execlist0] - status_reg = 0x7_8308

Note that having to wait for this ack does not disable lite-restores,
although it may reduce their numbers.

v2: Rewrote Michel's patch, his digging and his fix, my spelling.
v3: Reorder to ack early to allow preemption

v4: backported to 4.14.20.(Dmitry/Owen)

Change-Id: I92c7cd3a92fbde79f0a02448810f227bd6e0b351
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102035
Suggested-by: Michel Thierry <[email protected]>
Signed-off-by: Chris Wilson <[email protected]>
Cc: Michel Thierry <[email protected]>
Reviewed-by: Michel Thierry <[email protected]>
  • Loading branch information
ickle authored and dvrogozh committed Sep 4, 2018
1 parent e61a9cd commit 334f843
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
25 changes: 24 additions & 1 deletion drivers/gpu/drm/i915/intel_lrc.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
writel(upper_32_bits(desc), elsp);
writel(lower_32_bits(desc), elsp);
}
execlists_clear_active(engine, EXECLISTS_ACTIVE_HWACK);
}

static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
Expand Down Expand Up @@ -468,14 +469,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
bool submit = false;

last = port_request(port);
if (last)
if (last) {

/*
* If we write to ELSP a second time before the HW has had
* a chance to respond to the previous write, we can confuse
* the HW and hit "undefined behaviour". After writing to ELSP,
* we must then wait until we see a context-switch event from
* the HW to indicate that it has had a chance to respond.
*/
if (!execlists_is_active(engine, EXECLISTS_ACTIVE_HWACK))
return;

/* WaIdleLiteRestore:bdw,skl
* Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
* as we resubmit the request. See gen8_emit_breadcrumb()
* for where we prepare the padding after the end of the
* request.
*/
last->tail = last->wa_tail;
}

GEM_BUG_ON(port_isset(&port[1]));

Expand Down Expand Up @@ -656,6 +669,15 @@ static void intel_lrc_irq_handler(unsigned long data)
*/

status = readl(buf + 2 * head);

if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
GEN8_CTX_STATUS_PREEMPTED))
execlists_set_active(engine,
EXECLISTS_ACTIVE_HWACK);
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
execlists_clear_active(engine,
EXECLISTS_ACTIVE_HWACK);

if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
continue;

Expand Down Expand Up @@ -1334,6 +1356,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);

engine->active = 0;
/* After a GPU reset, we may have requests to replay */
submit = false;
for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
Expand Down
25 changes: 25 additions & 0 deletions drivers/gpu/drm/i915/intel_ringbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,10 @@ struct intel_engine_cs {
#define port_index(p, e) ((p) - (e)->execlist_port)
GEM_DEBUG_DECL(u32 context_id);
} execlist_port[2];

unsigned int active;
#define EXECLISTS_ACTIVE_HWACK 0

struct rb_root execlist_queue;
struct rb_node *execlist_first;
unsigned int fw_domains;
Expand Down Expand Up @@ -782,6 +786,27 @@ static inline bool intel_wait_complete(const struct intel_wait *wait)
return RB_EMPTY_NODE(&wait->node);
}

static inline void
execlists_set_active(struct intel_engine_cs *execlists,
unsigned int bit)
{
__set_bit(bit, (unsigned long *)&execlists->active);
}

static inline void
execlists_clear_active(struct intel_engine_cs *execlists,
unsigned int bit)
{
__clear_bit(bit, (unsigned long *)&execlists->active);
}

static inline bool
execlists_is_active(const struct intel_engine_cs *execlists,
unsigned int bit)
{
return test_bit(bit, (unsigned long *)&execlists->active);
}

bool intel_engine_add_wait(struct intel_engine_cs *engine,
struct intel_wait *wait);
void intel_engine_remove_wait(struct intel_engine_cs *engine,
Expand Down

0 comments on commit 334f843

Please sign in to comment.