diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index 6267293..1836089 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -526,8 +526,6 @@ struct r200_state { struct r200_tcl_info { GLuint hw_primitive; - GLuint *Elts; - int elt_used; }; diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h index 2a4b8a1..f6419f5 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.h +++ b/src/mesa/drivers/dri/r200/r200_ioctl.h @@ -125,10 +125,12 @@ static INLINE int R200_DB_STATECHANGE( * are available, you will also be adding an rmesa->state.max_state_size because * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. */ -#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int)) -#define VERT_AOS_BUFSZ (5 * sizeof(int)) +#define AOS_BUFSZ(nr) ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2)) +#define VERT_AOS_BUFSZ (5) #define ELTS_BUFSZ(nr) (12 + nr * 2) -#define VBUF_BUFSZ (3 * sizeof(int)) +#define VBUF_BUFSZ (3) +#define SCISSOR_BUFSZ (8) +#define INDEX_BUFSZ (8+2) static inline uint32_t cmdpacket3(int cmd_type) { diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index 78ad5ba..0a4364b 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -226,57 +226,73 @@ static int cmdscl2( int offset, int stride, int count ) return h.i; } -#define CHECK( NM, FLAG ) \ +/** + * Check functions are used to check if state is active. + * If it is active check function returns maximum emit size. + */ +#define CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ (void) rmesa; \ - return (FLAG) ? atom->cmd_size : 0; \ + return (FLAG) ? atom->cmd_size + (ADD) : 0; \ } -#define TCL_CHECK( NM, FLAG ) \ +#define TCL_CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \ + return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + ADD : 0; \ } -#define TCL_OR_VP_CHECK( NM, FLAG ) \ +#define TCL_OR_VP_CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \ + return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + ADD : 0; \ } -#define VP_CHECK( NM, FLAG ) \ +#define VP_CHECK( NM, FLAG, ADD ) \ static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ (void) atom; \ - return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \ + return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + ADD : 0; \ } -CHECK( always, GL_TRUE ) -CHECK( never, GL_FALSE ) -CHECK( tex_any, ctx->Texture._EnabledUnits ) -CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) ); -CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) ) -CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded ) -CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled ) - CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) ) -CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) ) -CHECK( afs, ctx->ATIFragmentShader._Enabled ) -CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT ) -TCL_CHECK( tcl_fog, ctx->Fog.Enabled ) -TCL_CHECK( tcl, GL_TRUE ) -TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded ) -TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) -TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled ) -TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) ) -TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE ) -VP_CHECK( tcl_vp, GL_TRUE ) -VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 ) -VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 ) +CHECK( always, GL_TRUE, 0 ) +CHECK( always_add4, GL_TRUE, 4 ) +CHECK( never, GL_FALSE, 0 ) +CHECK( tex_any, ctx->Texture._EnabledUnits, 0 ) +CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled), 0 ); +CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 ) + CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 ) +CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 ) +CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 ) +CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE ) +CHECK( tex_cube_cs, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 2 + 2*5 - CUBE_STATE_SIZE ) +TCL_CHECK( tcl_fog, ctx->Fog.Enabled, 0 ) +TCL_CHECK( tcl_fog_add4, ctx->Fog.Enabled, 4 ) +TCL_CHECK( tcl, GL_TRUE, 0 ) +TCL_CHECK( tcl_add8, GL_TRUE, 8 ) +TCL_CHECK( tcl_add4, GL_TRUE, 4 ) +TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded, 0 ) +TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 ) +TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 0 ) +TCL_CHECK( tcl_tex_add4, rmesa->state.texture.unit[atom->idx].unitneeded, 4 ) +TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 ) +TCL_CHECK( tcl_lighting_add6, ctx->Light.Enabled, 6 ) +TCL_CHECK( tcl_light_add8, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 8 ) +TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 0 ) +TCL_OR_VP_CHECK( tcl_ucp_add4, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 4 ) +TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE, 0 ) +TCL_OR_VP_CHECK( tcl_or_vp_add2, GL_TRUE, 2 ) +VP_CHECK( tcl_vp, GL_TRUE, 0 ) +VP_CHECK( tcl_vp_add4, GL_TRUE, 4 ) +VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 0 ) +VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 0 ) +VP_CHECK( tcl_vp_size_add4, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 4 ) +VP_CHECK( tcl_vpp_size_add4, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 4 ) #define OUT_VEC(hdr, data) do { \ drm_radeon_cmd_header_t h; \ @@ -329,9 +345,8 @@ static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 6; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1)); OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18)); @@ -342,9 +357,8 @@ static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 8; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1); OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1); @@ -355,9 +369,8 @@ static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 8; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1); OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1); @@ -368,9 +381,8 @@ static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 4; OUT_VECLINEAR(atom->cmd[0], atom->cmd+1); } @@ -378,9 +390,8 @@ static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 2; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_SCL(atom->cmd[0], atom->cmd+1); END_BATCH(); @@ -391,9 +402,8 @@ static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); - dwords += 4; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VEC(atom->cmd[0], atom->cmd+1); END_BATCH(); @@ -406,10 +416,10 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) struct radeon_renderbuffer *rrb; uint32_t cbpitch; uint32_t zbpitch, depth_fmt; - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); /* output the first 7 bytes of context */ - BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2); + BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH_TABLE(atom->cmd, 5); rrb = radeon_get_depthbuffer(&r200->radeon); @@ -466,6 +476,31 @@ static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) END_BATCH(); } +static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + struct radeon_renderbuffer *rrb, *drb; + uint32_t dwords; + + rrb = radeon_get_colorbuffer(&r200->radeon); + if (!rrb || !rrb->bo) { + return 0; + } + + drb = radeon_get_depthbuffer(&r200->radeon); + + dwords = 10; + if (drb) + dwords += 6; + if (rrb) + dwords += 8; + if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) + dwords += 4; + + + return dwords; +} + static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); @@ -473,7 +508,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) struct radeon_renderbuffer *rrb, *drb; uint32_t cbpitch = 0; uint32_t zbpitch = 0; - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); uint32_t depth_fmt; rrb = radeon_get_colorbuffer(&r200->radeon); @@ -511,14 +546,6 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; } - dwords = 10; - if (drb) - dwords += 6; - if (rrb) - dwords += 8; - if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) - dwords += 4; - /* output the first 7 bytes of context */ BEGIN_BATCH_NO_AUTOSTATE(dwords); @@ -556,16 +583,45 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) END_BATCH(); } +static int get_tex_size(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + uint32_t dwords = atom->cmd_size + 2; + int i = atom->idx; + radeonTexObj *t = r200->state.texture.unit[i].texobj; + if (!(t && t->mt && !t->image_override)) + dwords -= 2; + + return dwords; +} + +static int check_tex_pair(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx & ~1].unitneeded)) + return 0; + + return get_tex_size(ctx, atom); +} + +static int check_tex(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + if (!(r200->state.texture.unit[atom->idx].unitneeded)) + return 0; + + return get_tex_size(ctx, atom); +} + + static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->check(ctx, atom); int i = atom->idx; radeonTexObj *t = r200->state.texture.unit[i].texobj; - if (t && t->mt && !t->image_override) - dwords += 2; BEGIN_BATCH_NO_AUTOSTATE(dwords); /* is this ok even with drm older than 1.18? */ OUT_BATCH_TABLE(atom->cmd, 10); @@ -583,17 +639,13 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) END_BATCH(); } -static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +static int get_tex_cs_size(GLcontext* ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); - BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = atom->cmd_size + 2; + int hastexture = 1; int i = atom->idx; radeonTexObj *t = r200->state.texture.unit[i].texobj; - int hastexture = 1; - - if (!r200->state.texture.unit[i].unitneeded) - hastexture = 0; if (!t) hastexture = 0; else { @@ -601,16 +653,45 @@ static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) hastexture = 0; } - if (hastexture) - dwords += 2; - else - dwords -= 2; + if (!hastexture) + dwords -= 4; + return dwords; +} + +static int check_tex_pair_cs(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx & ~1].unitneeded)) + return 0; + + return get_tex_cs_size(ctx, atom); +} + +static int check_tex_cs(GLcontext* ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + if (!(r200->state.texture.unit[atom->idx].unitneeded)) + return 0; + + return get_tex_cs_size(ctx, atom); +} + + +static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->check(ctx, atom); + int i = atom->idx; + radeonTexObj *t = r200->state.texture.unit[i].texobj; + if (!r200->state.texture.unit[i].unitneeded) + dwords -= 4; BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7)); OUT_BATCH_TABLE((atom->cmd + 1), 8); - if (hastexture) { + if (dwords > atom->cmd_size) { OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0)); if (t->mt && !t->image_override) { OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, @@ -629,12 +710,15 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = 3; + uint32_t dwords = atom->check(ctx, atom); int i = atom->idx, j; radeonTexObj *t = r200->state.texture.unit[i].texobj; radeon_mipmap_level *lvl; - BEGIN_BATCH_NO_AUTOSTATE(dwords + (3 * 5)); + if (!(t && !t->image_override)) + dwords = 2; + + BEGIN_BATCH_NO_AUTOSTATE(dwords); /* XXX that size won't really match with image_override... */ OUT_BATCH_TABLE(atom->cmd, 2); @@ -653,12 +737,14 @@ static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = 2; + uint32_t dwords = atom->check(ctx, atom); int i = atom->idx, j; radeonTexObj *t = r200->state.texture.unit[i].texobj; radeon_mipmap_level *lvl; + if (!(t && !t->image_override)) + dwords = 2; - BEGIN_BATCH_NO_AUTOSTATE(dwords + (4 * 5)); + BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_BATCH_TABLE(atom->cmd, 2); if (t && !t->image_override) { @@ -713,14 +799,19 @@ void r200InitState( r200ContextPtr rmesa ) /* Allocate state buffers: */ if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) - ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 ); + ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 ); else - ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 ); + ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 ); if (rmesa->radeon.radeonScreen->kernel_mm) + { rmesa->hw.ctx.emit = ctx_emit_cs; + rmesa->hw.ctx.check = check_always_ctx; + } else + { rmesa->hw.ctx.emit = ctx_emit; + } ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 ); ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); @@ -732,44 +823,51 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 ); ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 ); ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 ); - if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { - if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { - /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ - ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + { + int state_size = TEX_STATE_SIZE_NEWDRM; + if (!rmesa->radeon.radeonScreen->drmSupportsFragShader) { + state_size = TEX_STATE_SIZE_OLDDRM; } - else { - ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); - } - ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-2", 2 ); - ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-3", 3 ); - ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-4", 4 ); - ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_NEWDRM, "TEX/tex-5", 5 ); - ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 ); - ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); - ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); - } - else { - if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { - ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); - } - else { - ALLOC_STATE( tex[0], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); - ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { + /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ + ALLOC_STATE( tex[0], tex_pair_cs, state_size, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_pair_cs, state_size, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + else { + ALLOC_STATE( tex[0], tex_cs, state_size, "TEX/tex-0", 0 ); + ALLOC_STATE( tex[1], tex_cs, state_size, "TEX/tex-1", 1 ); + ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 ); + } + ALLOC_STATE( tex[2], tex_cs, state_size, "TEX/tex-2", 2 ); + ALLOC_STATE( tex[3], tex_cs, state_size, "TEX/tex-3", 3 ); + ALLOC_STATE( tex[4], tex_cs, state_size, "TEX/tex-4", 4 ); + ALLOC_STATE( tex[5], tex_cs, state_size, "TEX/tex-5", 5 ); + if (!rmesa->radeon.radeonScreen->kernel_mm) + { + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { + rmesa->hw.tex[0].check = check_tex_pair; + rmesa->hw.tex[1].check = check_tex_pair; + } else { + rmesa->hw.tex[0].check = check_tex; + rmesa->hw.tex[1].check = check_tex; + } + rmesa->hw.tex[2].check = check_tex; + rmesa->hw.tex[3].check = check_tex; + rmesa->hw.tex[4].check = check_tex; + rmesa->hw.tex[5].check = check_tex; + } + if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { + ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 ); + ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + } else { + ALLOC_STATE( atf, never, ATF_STATE_SIZE, "ATF/tfactor", 0 ); + ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); + ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); + } } - ALLOC_STATE( tex[2], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-2", 2 ); - ALLOC_STATE( tex[3], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-3", 3 ); - ALLOC_STATE( tex[4], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-4", 4 ); - ALLOC_STATE( tex[5], tex, TEX_STATE_SIZE_OLDDRM, "TEX/tex-5", 5 ); - ALLOC_STATE( atf, never, ATF_STATE_SIZE, "TF/tfactor", 0 ); - ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); - ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); } for (i = 0; i < 6; i++) @@ -785,9 +883,10 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); for (i = 0; i < 6; i++) - if (rmesa->radeon.radeonScreen->kernel_mm) + if (rmesa->radeon.radeonScreen->kernel_mm) { rmesa->hw.cube[i].emit = cube_emit_cs; - else + rmesa->hw.cube[i].check = check_tex_cube_cs; + } else rmesa->hw.cube[i].emit = cube_emit; } else { @@ -801,10 +900,17 @@ void r200InitState( r200ContextPtr rmesa ) if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) { ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); - ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); - ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); - ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); - ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + if (rmesa->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE( vpi[0], tcl_vp_add4, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], tcl_vp_size_add4, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); + ALLOC_STATE( vpp[0], tcl_vp_add4, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); + ALLOC_STATE( vpp[1], tcl_vpp_size_add4, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + } else { + ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); + ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); + ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 ); + ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 ); + } } else { ALLOC_STATE( pvs, never, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); @@ -817,35 +923,67 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 ); ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 ); ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 ); - ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 ); - ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 ); - ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 ); - ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 ); - ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 ); - ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 ); - ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 ); - ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 ); - ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 ); - ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 ); - ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 ); - ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 ); - ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 ); - ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 ); - ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 ); - ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 ); - ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); - ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 ); - ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 ); - ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 ); - ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 ); - ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 ); - ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 ); - ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 ); - ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 ); - ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 ); - ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 ); - ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 ); - ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 ); + if (rmesa->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE( mtl[0], tcl_lighting_add6, MTL_STATE_SIZE, "MTL0/material0", 0 ); + ALLOC_STATE( mtl[1], tcl_lighting_add6, MTL_STATE_SIZE, "MTL1/material1", 1 ); + ALLOC_STATE( grd, tcl_or_vp_add2, GRD_STATE_SIZE, "GRD/guard-band", 0 ); + ALLOC_STATE( fog, tcl_fog_add4, FOG_STATE_SIZE, "FOG/fog", 0 ); + ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 0 ); + ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 0 ); + ALLOC_STATE( mat[R200_MTX_MV], tcl_add4, MAT_STATE_SIZE, "MAT/modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_IMV], tcl_add4, MAT_STATE_SIZE, "MAT/it-modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_MVP], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat0", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 ); + ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat2", 2 ); + ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat3", 3 ); + ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat4", 4 ); + ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat5", 5 ); + ALLOC_STATE( ucp[0], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-0", 0 ); + ALLOC_STATE( ucp[1], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); + ALLOC_STATE( ucp[2], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-2", 2 ); + ALLOC_STATE( ucp[3], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-3", 3 ); + ALLOC_STATE( ucp[4], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-4", 4 ); + ALLOC_STATE( ucp[5], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-5", 5 ); + ALLOC_STATE( lit[0], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-0", 0 ); + ALLOC_STATE( lit[1], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-1", 1 ); + ALLOC_STATE( lit[2], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-2", 2 ); + ALLOC_STATE( lit[3], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-3", 3 ); + ALLOC_STATE( lit[4], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-4", 4 ); + ALLOC_STATE( lit[5], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-5", 5 ); + ALLOC_STATE( lit[6], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-6", 6 ); + ALLOC_STATE( lit[7], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-7", 7 ); + } else { + ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 ); + ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 ); + ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 ); + ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 ); + ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 ); + ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 ); + ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 ); + ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 ); + ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 ); + ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 ); + ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 ); + ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 ); + ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 ); + ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 ); + ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 ); + ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 ); + ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 ); + ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 ); + ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 ); + ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 ); + ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 ); + ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 ); + ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 ); + ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 ); + ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 ); + ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 ); + ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 ); + } ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 ); ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 ); ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 ); @@ -860,7 +998,7 @@ void r200InitState( r200ContextPtr rmesa ) } if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) { ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 ); - ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 ); + ALLOC_STATE( ptp, tcl_add8, PTP_STATE_SIZE, "PTP/pointparams", 0 ); } else { ALLOC_STATE (spr, never, SPR_STATE_SIZE, "SPR/pointsprite", 0 ); @@ -953,18 +1091,18 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL); rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL); if (rmesa->radeon.radeonScreen->kernel_mm) { - rmesa->hw.mtl[0].emit = mtl_emit; - rmesa->hw.mtl[1].emit = mtl_emit; - - rmesa->hw.vpi[0].emit = veclinear_emit; - rmesa->hw.vpi[1].emit = veclinear_emit; - rmesa->hw.vpp[0].emit = veclinear_emit; - rmesa->hw.vpp[1].emit = veclinear_emit; - - rmesa->hw.grd.emit = scl_emit; - rmesa->hw.fog.emit = vec_emit; - rmesa->hw.glt.emit = vec_emit; - rmesa->hw.eye.emit = vec_emit; + rmesa->hw.mtl[0].emit = mtl_emit; + rmesa->hw.mtl[1].emit = mtl_emit; + + rmesa->hw.vpi[0].emit = veclinear_emit; + rmesa->hw.vpi[1].emit = veclinear_emit; + rmesa->hw.vpp[0].emit = veclinear_emit; + rmesa->hw.vpp[1].emit = veclinear_emit; + + rmesa->hw.grd.emit = scl_emit; + rmesa->hw.fog.emit = vec_emit; + rmesa->hw.glt.emit = vec_emit; + rmesa->hw.eye.emit = vec_emit; for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) rmesa->hw.mat[i].emit = vec_emit; diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 83e70b5..f4af1b1 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -268,7 +268,7 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) { r200ContextPtr rmesa = R200_CONTEXT(ctx); rcommonEnsureCmdBufSpace(&rmesa->radeon, - rmesa->radeon.hw.max_state_size + (12*sizeof(int)), + radeonCountEmitSize(&rmesa->radeon) + (12*sizeof(int)), __FUNCTION__); diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index 5803709..ecfb6fc 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -109,7 +109,7 @@ static GLboolean discrete_prim[0x10] = { #define ELT_INIT(prim, hw_prim) \ r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND ) -#define GET_MESA_ELTS() rmesa->tcl.Elts +#define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts /* Don't really know how many elts will fit in what's left of cmdbuf, @@ -156,8 +156,6 @@ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) if (rmesa->radeon.dma.flush) rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__); - r200EmitAOS( rmesa, rmesa->radeon.tcl.aos_count, 0 ); @@ -187,9 +185,6 @@ static void r200EmitPrim( GLcontext *ctx, r200TclPrimitive( ctx, prim, hwprim ); // fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count); - rcommonEnsureCmdBufSpace( &rmesa->radeon, - AOS_BUFSZ(rmesa->radeon.tcl.aos_count) + - rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); r200EmitAOS( rmesa, rmesa->radeon.tcl.aos_count, @@ -206,6 +201,7 @@ static void r200EmitPrim( GLcontext *ctx, r200EmitPrim( ctx, prim, hwprim, start, count ); \ (void) rmesa; } while (0) +#define MAX_CONVERSION_SIZE 40 /* Try & join small primitives */ #if 0 @@ -368,6 +364,58 @@ r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) } } +/** + * Predict total emit size for next rendering operation so there is no flush in middle of rendering + * Prediction has to aim towards the best possible value that is worse than worst case scenario + */ +static void r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint space_required; + GLuint nr_aos = 0; + int i; + /* predict number of aos to emit */ + for (i = 0; i < 15; ++i) + { + if (vimap_rev[i] != 255) + { + ++nr_aos; + } + } + + { + /* count the prediction for state size */ + space_required = radeonCountEmitSize( &rmesa->radeon ); + /* vtx may be changed in r200EmitArrays so account for it if not dirty */ + if (!rmesa->hw.vtx.dirty) + space_required += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx); + /* predict size for elements */ + for (i = 0; i < VB->PrimitiveCount; ++i) + { + if (!VB->Primitive[i].count) + continue; + /* If primitive.count is less than MAX_CONVERSION_SIZE + rendering code may decide convert to elts. + In that case we have to make pessimistic prediction. + and use larger of 2 paths. */ + const GLuint elts = ELTS_BUFSZ(nr_aos); + const GLuint index = INDEX_BUFSZ; + const GLuint vbuf = VBUF_BUFSZ; + if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) + || vbuf > index + elts) + space_required += vbuf; + else + space_required += index + elts; + space_required += AOS_BUFSZ(nr_aos); + } + space_required += SCISSOR_BUFSZ; + } + /* flush the buffer in case we need more than is left. */ + rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__); +} + /**********************************************************************/ /* Render pipeline stage */ @@ -482,10 +530,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* Do the actual work: */ radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); + r200EnsureEmitSize( ctx, vimap_rev ); r200EmitArrays( ctx, vimap_rev ); - rmesa->tcl.Elts = VB->Elts; - for (i = 0 ; i < VB->PrimitiveCount ; i++) { GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); @@ -495,7 +542,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, if (!length) continue; - if (rmesa->tcl.Elts) + if (VB->Elts) r200EmitEltPrimitive( ctx, start, start+length, prim ); else r200EmitPrimitive( ctx, start, start+length, prim ); diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 330c2c8..236ce13 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -925,10 +925,10 @@ static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_s int dwords = (*state->check) (radeon->glCtx, state); uint32_t packet0; - fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); + fprintf(stderr, " emit %s %d/%d\n", state->name, state->cmd_size, dwords); if (RADEON_DEBUG & DEBUG_VERBOSE) { - for (i = 0; i < dwords;) { + for (i = 0; i < state->cmd_size;) { packet0 = state->cmd[i]; reg = (packet0 & 0x1FFF) << 2; count = ((packet0 & 0x3FFF0000) >> 16) + 1; @@ -945,6 +945,27 @@ static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_s } } +/** + * Count total size for next state emit. + **/ +GLuint radeonCountEmitSize(radeonContextPtr radeon) +{ + struct radeon_state_atom *atom; + int dwords = 0; + /* check if we are going to emit full state */ + if (!radeon->cmdbuf.cs->cdw) { + foreach(atom, &radeon->hw.atomlist) { + dwords += atom->check(radeon->glCtx, atom); + } + } else { + foreach(atom, &radeon->hw.atomlist) { + if (atom->dirty) + dwords += atom->check(radeon->glCtx, atom); + } + } + return dwords; +} + static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty) { BATCH_LOCALS(radeon); @@ -1008,12 +1029,6 @@ void radeonEmitState(radeonContextPtr radeon) if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty) return; - /* To avoid going across the entire set of states multiple times, just check - * for enough space for the case of emitting all state, and inline the - * radeonAllocCmdBuf code here without all the checks. - */ - rcommonEnsureCmdBufSpace(radeon, radeon->hw.max_state_size, __FUNCTION__); - if (!radeon->cmdbuf.cs->cdw) { if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "Begin reemit state\n"); @@ -1168,10 +1183,12 @@ int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller) */ void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller) { - if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size || - radeon_cs_need_flush(rmesa->cmdbuf.cs)) { - rcommonFlushCmdBuf(rmesa, caller); - } + if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size + || radeon_cs_need_flush(rmesa->cmdbuf.cs)) { + /* If we try to flush empty buffer there is too big rendering operation. */ + assert(rmesa->cmdbuf.cs->cdw); + rcommonFlushCmdBuf(rmesa, caller); + } } void rcommonInitCmdBuf(radeonContextPtr rmesa) @@ -1246,7 +1263,6 @@ void rcommonBeginBatch(radeonContextPtr rmesa, int n, const char *function, int line) { - rcommonEnsureCmdBufSpace(rmesa, n, function); if (!rmesa->cmdbuf.cs->cdw && dostate) { if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "Reemit state after flush (from %s)\n", function); diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h index cebae18..6e81100 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.h +++ b/src/mesa/drivers/dri/radeon/radeon_common.h @@ -24,6 +24,7 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa); void radeonFlush(GLcontext *ctx); void radeonFinish(GLcontext * ctx); void radeonEmitState(radeonContextPtr radeon); +GLuint radeonCountEmitSize(radeonContextPtr radeon); void radeon_clear_tris(GLcontext *ctx, GLbitfield mask);