This commit is contained in:
nephacks
2025-06-04 03:22:50 +02:00
parent f234f23848
commit f12416cffd
14243 changed files with 6446499 additions and 26 deletions

View File

@ -0,0 +1,28 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#ifndef VJOBS_ACCUMPOSE_SHARED_HDR
#define VJOBS_ACCUMPOSE_SHARED_HDR
#include "ps3/spu_job_shared.h"
struct PS3BoneJobData;
namespace job_accumpose
{
typedef CellSpursJob128 JobDescriptor_t;
struct JobParams_t
{
int m_testInt_IN;
int m_testInt_OUT;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
#endif

View File

@ -0,0 +1,282 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#include "vjobs/pcring.h"
#include "vjobs/edgegeom_shared.h"
#ifdef SPU
#if EDGEGEOMRING_DEBUG_TRACE
#include "vjobs/edgegeomparams_shared.h" // debug
namespace job_edgegeom{ extern JobParams_t * g_lsJobParams; }
#endif
void CEdgeGeomRing::Test()
{
for( ;; )
{
cellDmaGetllar( this, m_eaThis, 0, 0 );
//if( 0 == __builtin_expect( spu_readch( MFC_RdAtomicStat ), 0 ) )
uint nStatusGetllar = cellDmaWaitAtomicStatus();(void)nStatusGetllar;
#if EDGEGEOMRING_DEBUG_TRACE
m_nUseCounter++;
#endif
cellDmaPutllc( this, m_eaThis, 0, 0 );
uint nStatusPutllc = cellDmaWaitAtomicStatus();
if( 0 == __builtin_expect( nStatusPutllc, 0 ) )
{
break; // succeeded
}
//VjobSpuLog("job_edgegeom Test failed(%d,%d)\n", nStatusGetllar, nStatusPutllc );
}
}
struct ALIGN16 FifoSnapshot_t
{
uint32 m_nSignal;
uint32 m_nPut;
uint32 m_nEnd;
uint32 m_nRingIncarnation;
void Snapshot( CEdgeGeomRing * pRing )
{
m_nPut = pRing->m_ibvbRing.m_nPut;
m_nEnd = pRing->m_ibvbRing.m_nEnd;
m_nRingIncarnation = pRing->m_nRingIncarnation;
// update the signal, since we're spinning
m_nSignal = cellDmaGetUint32( uintp( pRing->m_eaIbvbRingLabel ), DMATAG_SYNC, 0, 0 );
}
}
ALIGN16_POST;
uintp CEdgeGeomRing::Allocate( CellGcmContextData *pGcmCtx, uint nBytesUnaligned, uint nQueueTag )
{
// allocate in aligned chunks to make it all aligned
uint nBytesAligned = AlignValue( nBytesUnaligned, 32 );
AssertSpuMsg( nBytesAligned <= EDGEGEOMRING_MAX_ALLOCATION, "job_edgegeom allocates %u > %u from edge", nBytesAligned, EDGEGEOMRING_MAX_ALLOCATION );
uintp eaAllocation = 0;
SysFifo::PreparePutEnum_t nResult = SysFifo::PUT_PREPARE_FAILED;
uint nStatusGetllar, nStatusPutllc;
uint nSpins = 0, nAtomicCollisionEvent = 0, nWaitRsxSpins = 0;
uint nStoredSignal;
uint nSpuFlag = 1 << VjobSpuId();
union
{
FifoSnapshot_t fields;
__vector int vi4;
}snapshot;
snapshot.vi4 = (__vector int){-1,-1,-1,-1};
uint32 nJobId = job_edgegeom::g_lsJobParams->m_nEdgeJobId;
for(;; nSpins ++)
{
cellDmaGetllar( this, m_eaThis, 0, 0 );
//if( 0 == __builtin_expect( spu_readch( MFC_RdAtomicStat ), 0 ) )
nStatusGetllar = cellDmaWaitAtomicStatus();
{
// reservation succeeded
Assert( m_ibvbRing.m_nPut != 0xFFFFFFFF );
if( snapshot.fields.m_nPut == m_ibvbRing.m_nPut && snapshot.fields.m_nRingIncarnation == m_nRingIncarnation )
{
// the put didn't change, ring incarnation didn't change.
// Therefore, nobody changed this object - between
// last getllar, getting signal and this getllar,
// so it's atomic if we update the signal now.
m_ibvbRing.NotifySignalSafe( snapshot.fields.m_nSignal );
}
nResult = m_ibvbRing.PreparePut( nBytesAligned );
if( nResult != SysFifo::PUT_PREPARE_FAILED )
{
eaAllocation = m_ibvbRing.EaPut();
m_ibvbRing.Put( nBytesAligned );
nStoredSignal = m_ibvbRing.GetSignal();
m_ibvbRingSignal[nQueueTag] = nStoredSignal;
m_nAtomicCollisionSpins += nAtomicCollisionEvent;
m_nRsxWaitSpins += nWaitRsxSpins;
m_nUsedSpus |= nSpuFlag;
if( ( ( signed int )( nJobId - m_nMaxJobId[nQueueTag] ) ) > 0 )
{
m_nMaxJobId[nQueueTag] = nJobId;
}
if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
{
m_nRingIncarnation++; // we allocated, wrapping
}
#if EDGEGEOMRING_DEBUG_TRACE
m_nUseCounter++;
COMPILE_TIME_ASSERT( !( EDGEGEOMRING_DEBUG_TRACE & ( EDGEGEOMRING_DEBUG_TRACE - 1 ) ) );
m_nNextDebugTrace = ( m_nNextDebugTrace + 1 ) & ( EDGEGEOMRING_DEBUG_TRACE - 1 );
#endif
cellDmaPutllc( this, m_eaThis, 0, 0 );
nStatusPutllc = cellDmaWaitAtomicStatus();
if( 0 == __builtin_expect( nStatusPutllc, 0 ) )
{
break; // succeeded
}
}
else
{
nWaitRsxSpins ++;
}
}
snapshot.fields.Snapshot( this );
if( nSpins == 100000 && !IsCert() )
{
// VjobSpuLog( "job_edgegeom Allocate spinning: %d, %d, signal 0x%X;\n", nStatusGetllar, nStatusPutllc, nLastSeenSignal );
// DebuggerBreak();
}
}
if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
{
// need to clear cache
cellGcmSetInvalidateVertexCacheInline( pGcmCtx );
//VjobSpuLog( "job_edgegeom Allocate wrapped ring, invalidated vertex cache\n" );
}
else
{
Assert( nResult == SysFifo::PUT_PREPARED_NOWRAP );
}
Assert( nStoredSignal == m_ibvbRing.GetSignal() );
//VjobSpuLog( "alloc %X, signal %X, prev6 signal:%X, pcring put %x end %x\n", eaAllocation, nStoredSignal, m_ibvbRingSignal[(nTag-1)&3], m_ibvbRing.m_nPut, m_ibvbRing.m_nEnd );
#if EDGEGEOMRING_DEBUG_TRACE
if( m_eaDebugTrace && m_enableDebugTrace )
{
EdgeGeomDebugTrace_t trace;
trace.m_nAllocResult = (uint8)nResult;
trace.m_nQueueTag = (uint8)job_edgegeom::g_lsJobParams->m_nQueueTag;
trace.m_nJobId = job_edgegeom::g_lsJobParams->m_nEdgeJobId;
trace.m_nPut = m_ibvbRing.m_nPut;
trace.m_nEnd = m_ibvbRing.m_nEnd;
trace.m_eaEdgeGeomJts = job_edgegeom::g_lsJobParams->m_eaEdgeGeomJts;
for( uint i = 0; i < EDGEGEOMRING_JOBQUEUE_TAG_COUNT; ++i )
trace.m_nTagSignal[i] = m_ibvbRingSignal[i];
VjobDmaPutf( &trace, uintp( m_eaDebugTrace + m_nNextDebugTrace ), sizeof( trace ), VJOB_IOBUFFER_DMATAG, 0, 0 );
VjobWaitTagStatusAll( 1 << VJOB_IOBUFFER_DMATAG );
}
#endif
return eaAllocation;
}
#else
void CEdgeGeomRing::Init( void* eaBuffer, uint nBufferSize, uint nIoOffsetDelta, void * eaLocalBaseAddress, uint nLabel )
{
COMPILE_TIME_ASSERT( sizeof( CEdgeGeomRing_Mutable ) <= 128 ); // we need to fit into 128 bytes so that atomics work
m_ibvbRing.Init( (uintp)eaBuffer, nBufferSize );
m_eaLocalBaseAddress = (uint) eaLocalBaseAddress;
m_nIoOffsetDelta = nIoOffsetDelta;
m_nIbvbRingLabel = nLabel;
m_eaIbvbRingLabel = cellGcmGetLabelAddress( nLabel );
*m_eaIbvbRingLabel = m_ibvbRing.GetSignal();
m_ibvbRingSignal[0] = m_ibvbRing.GetSignal();
for( uint i = 0; i < EDGEGEOMRING_JOBQUEUE_TAG_COUNT; ++i )
{
m_ibvbRingSignal[i] = m_ibvbRingSignal[0];
}
V_memset( m_nMaxJobId, 0xFF, sizeof( m_nMaxJobId ) );
m_eaThis = (uint) this;
m_nDebuggerBreakMask = 0;
m_nAtomicCollisionSpins = 0;
m_nRsxWaitSpins = 0;
m_nRingIncarnation = 0;
#if EDGEGEOMRING_DEBUG_TRACE
m_nUseCounter = 0;
m_eaDebugTrace = NULL;
m_eaDebugTrace = ( EdgeGeomDebugTrace_t* )MemAlloc_AllocAligned( sizeof( EdgeGeomDebugTrace_t ) * EDGEGEOMRING_DEBUG_TRACE, 16 * 16 * 16 );
m_nNextDebugTrace = 0;
m_enableDebugTrace = true;
#endif
}
void CEdgeGeomRing::Shutdown()
{
#if EDGEGEOMRING_DEBUG_TRACE
MemAlloc_FreeAligned( m_eaDebugTrace );
#endif
}
void CEdgeGeomFeeder::Init( uint nIbvbRingSize )
{
m_nJobQueueTag = 0;
m_nSpawnedJobsWithTag = 0;
m_nTotalEdgeGeomJobCounter = 0;
m_nSpawnedJobsWithTagReserveAllocate = 0;
m_nIbvbRingSize = nIbvbRingSize;
}
void CEdgeGeomRing::Test()
{
#if EDGEGEOMRING_DEBUG_TRACE
m_nUseCounter++;
#endif
}
uintp CEdgeGeomRing::Allocate( CellGcmContextData *pGcmCtx, uint nBytesUnaligned, uint nQueueTag )
{
// this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible!
DebuggerBreak(); // this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible!
Warning( "this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible\n" );
// allocate in aligned chunks to make it all aligned
uint nBytesAligned = AlignValue( nBytesUnaligned, 32 );
AssertSpuMsg( nBytesAligned <= EDGEGEOMRING_MAX_ALLOCATION, "job_edgegeom allocates %u > %u from edge", nBytesAligned, EDGEGEOMRING_MAX_ALLOCATION );
uint nLastSeenSignal = m_ibvbRing.GetInvalidSignal();
uintp eaAllocation = 0;
for(;;)
{
V_memcpy( this, (void*)m_eaThis, sizeof( *this ) );
// emulate: reservation succeeded
if( nLastSeenSignal != m_ibvbRing.GetInvalidSignal() )
{
m_ibvbRing.NotifySignal( nLastSeenSignal );
}
SysFifo::PreparePutEnum_t nResult = m_ibvbRing.PreparePut( nBytesAligned );
if( nResult != SysFifo::PUT_PREPARE_FAILED )
{
if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
{
// need to clear cache
cellGcmSetInvalidateVertexCacheInline( pGcmCtx );
}
eaAllocation = m_ibvbRing.EaPut();
m_ibvbRing.Put( nBytesAligned );
m_ibvbRingSignal[nQueueTag] = m_ibvbRing.GetSignal();
V_memcpy( (void*)m_eaThis, this, sizeof( *this ) );
break; // succeeded
}
// update the signal, since we're spinning
nLastSeenSignal = VjobDmaGetUint32( uintp( m_eaIbvbRingLabel ), DMATAG_SYNC, 0, 0 );
}
return eaAllocation;
}
#endif

View File

@ -0,0 +1,130 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#if !defined( JOB_EDGE_GEOM_SHARED_HDR ) && defined( _PS3 )
#define JOB_EDGE_GEOM_SHARED_HDR
#include "ps3/spu_job_shared.h"
#include "vjobs/pcring.h"
#define EDGEGEOMRING_DEBUG_TRACE 128 // set to 0 to disable debug trace
// the max allocation, in bytes, made from job_edgegeom
#define EDGEGEOMRING_MAX_ALLOCATION ( 32 * 1024 )
#define EDGEGEOMRING_JOBQUEUE_TAG_COUNT 4
struct ALIGN16 EdgeGeomDebugTrace_t
{
uint8 m_nAllocResult;
uint8 m_nQueueTag;
uint16 m_nJobId;
uint32 m_eaEdgeGeomJts;
uint32 m_nPut;
uint32 m_nEnd;
uint32 m_nTagSignal[EDGEGEOMRING_JOBQUEUE_TAG_COUNT];
}ALIGN16_POST;
struct ALIGN16 CEdgeGeomRing_Mutable
{
SysFifo m_ibvbRing; // edge geom index buffer/vertex buffer ring
// WARNING. Although logically there may be any (2^x) number of queue tags for edgegeom jobs,
// The SPURS must only see 0 and 1 (even and odd) tags. Even tags must synchronize with even,
// odd tags must synchronize with odd.
uint32 m_ibvbRingSignal[EDGEGEOMRING_JOBQUEUE_TAG_COUNT];
uint32 m_nMaxJobId[EDGEGEOMRING_JOBQUEUE_TAG_COUNT];
uint m_nAtomicCollisionSpins;
uint m_nRsxWaitSpins;
uint m_nRingIncarnation;
uint m_nUsedSpus;
#if EDGEGEOMRING_DEBUG_TRACE
uint m_nNextDebugTrace;
#endif
}
ALIGN16_POST;
struct ALIGN128 CEdgeGeomRing: public CEdgeGeomRing_Mutable
{
// immutable part
uint m_eaLocalBaseAddress;
uint m_nIoOffsetDelta;
uint m_nIbvbRingLabel;
uint32 * m_eaIbvbRingLabel;
uint m_eaThis;
uint m_nDebuggerBreakMask;
#if EDGEGEOMRING_DEBUG_TRACE
uint m_nUseCounter;
EdgeGeomDebugTrace_t *m_eaDebugTrace;
bool m_enableDebugTrace;
#endif
public:
uintp Allocate( CellGcmContextData *pGcmCtx, uint nBytes, uint nTag );
void Test();
#ifndef SPU
void Init( void* eaBuffer, uint nBufferSize, uint nIoOffsetDelta, void * eaLocalBaseAddress, uint nLabel );
void Shutdown();
#endif
}
ALIGN128_POST;
enum EdgeGeomJobConstEnum_t
{
EDGEGEOMJOB_SCRATCH_SIZE = 64 * 1024
};
struct CEdgeGeomFeeder
{
uint m_nJobQueueTag; // the tag we're spawning jobs with, currently
uint m_nSpawnedJobsWithTag; // number of jobs we spawned with the tag
// max bytes that the jobs spawned with current tag can allocate;
// must not exceed 1/8 of the full ring buffer size because there may be 4 full tag switches between setting label
uint m_nSpawnedJobsWithTagReserveAllocate;
uint m_nTotalEdgeGeomJobCounter;
uint m_nIbvbRingSize;
bool Tick( uint nReserve );
#ifndef SPU
void Init( uint nIbvbRingSize );
#endif
};
inline bool CEdgeGeomFeeder::Tick( uint nReserve )
{
++m_nTotalEdgeGeomJobCounter;
++m_nSpawnedJobsWithTag;
m_nSpawnedJobsWithTagReserveAllocate += nReserve; // tentatively add the reserve to the same tag..
//if( ++m_nSpawnedJobsWithTag > 6 )
if( m_nSpawnedJobsWithTagReserveAllocate > m_nIbvbRingSize / 8 )
{
m_nJobQueueTag = ( m_nJobQueueTag + 1 ) & ( EDGEGEOMRING_JOBQUEUE_TAG_COUNT - 1 );
m_nSpawnedJobsWithTag = 0;
m_nSpawnedJobsWithTagReserveAllocate = nReserve;
return true ;// !( m_nJobQueueTag & 1 ); // only insert labels in one tag, to make sure they insert in serial fashion
}
else
{
return false;
}
}
namespace job_edgegeom
{
enum FlagEnum_t
{
FLAG_SWITCH_JOBQUEUE_TAG = 0x80,
FLAG_SKIP_VERTEX_CACHE_INVALIDATE = 0x40000000
};
}
#endif

View File

@ -0,0 +1,70 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#if !defined( VJOBS_EDGEGEOM_JOBPARAMS_SHARED_HDR ) && defined( _PS3 )
#define VJOBS_EDGEGEOM_JOBPARAMS_SHARED_HDR
#include "ps3/spu_job_shared.h"
#include "edge/geom/edgegeom_structs.h"
struct SpuGcmEdgeGeomParams_t;
namespace job_edgegeom
{
struct JobParams_t
{
uint32 m_nFlags;
uint32 m_eaEdgeGcmControl;
uint32 m_numEdgeIndices;
uint32 m_numEdgeVertices;
EdgeGeomViewportInfo m_edgeGeomViewportInfo;
EdgeGeomLocalToWorldMatrix m_edgeGeomLocalToWorldMatrix;
uint32 m_nAdjustOutputIndices; // this gets added to the output indices, to adjust for the skipped vertices
uint32 m_uiGcmCount;
uint32 m_nLocalMemoryIndexBuffer;
uint32 m_nCmdBufferHoleBytes;
uint32 m_nQueueTag;
uint32 m_eaEdgeGeomJts;
uint32 m_eaEdgeDmaInputBase;
uint32 m_nEdgeDmaInputIdx;
uint32 m_nEdgeDmaInputVtx;
uint32 m_nEdgeDmaInputEnd;
uint32 m_nEdgeDmaIoBufferSize;
uint32 m_nExecutedOnSpu; // this must be 0xFFFF FFFF before it's executed, and the SpuId when it's executed
uint32 m_nEdgeJobId;
uint16 m_uiMarkupVersionFlags;
uint8 m_uiGcmMode;
uint8 m_uiCullFlavor;
};
struct ALIGN128 JobDescriptor_t
{
CellSpursJobHeader header;
enum { DMA_LIST_CAPACITY = 1 + 4 + 4 + 4 + 1 };
union {
uint64_t dmaList[DMA_LIST_CAPACITY];
uint64_t userData[DMA_LIST_CAPACITY];
} workArea;
// pad it so that params END exactly at the end of the structure, this leaves the maximum safety slack
// between the params and DMA list just in case the DMA list overflows
uint8 paddingToMakeJobDescriptorBigEnough[ sizeof(JobParams_t) ];
//uint8 padding[ 127 & ~( sizeof( CellSpursJobHeader ) + sizeof( uint64 ) * DMA_LIST_CAPACITY ) ];
//JobParams_t params;
}ALIGN128_POST;
inline JobParams_t * GetJobParams( void *pJob )
{
COMPILE_TIME_ASSERT( sizeof( JobDescriptor_t ) <= 896 ); // the absolute maximum for the job descriptor
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
#endif

View File

@ -0,0 +1,128 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#if !defined( JOB_FPCPATCH_SHARED_HDR ) && defined( _PS3 )
#define JOB_FPCPATCH_SHARED_HDR
#include "ps3/spu_job_shared.h"
#include "ps3/ps3_gcm_config.h"
namespace job_fpcpatch
{
// On PS/3, fragment programs have a maximum of 256 constant patch-ups that can be applied to each shader
// PS 2.0 defines 96 as minimum, but I'm not sure how many we are actually using
enum GlobalConstEnum_t
{
MAX_VIRTUAL_CONST_COUNT = MAX_FPCP_VIRTUAL_CONST_COUNT,
FLAG_PUT_STATE = 1, // DMA the new state out when done; must be synchronized by vjobs code running at PPU
FLAG_BREAK_JOB = 2,
FLAG_DEFER_STATE = 4,
FLAG_UNDEFER_STATE = 8 // this flag must fit in the lower 4 bits, because deferred state may be 16-byte aligned, not 128-byte aligned
};
union ConstRangeHeader_t
{
fltx4 m_f4;
struct
{
uint32 m_nStart;
uint32 m_nCount;
} m_u32;
};
struct ALIGN16 FpcPatchStateHeader_t
{
#ifndef SPU
volatile // there's no need to treat this as volatile on SPU
#endif
uint32 m_nStartRanges; // the start index of ConstRangeHeader_t
uint32 m_nBufferMask; // the number of Qwords in the buffer - 1
FpcPatchStateHeader_t * m_eaThis;
uint32 m_nThisStatePatchCounter; // the patch counter corresponding to this state (the job at which it was up to date)
uint32 m_eaThisStateJobDescriptor;
uint32 m_nDebuggerBreak;
}
ALIGN16_POST;
struct ALIGN128 FpcPatchState_t: FpcPatchStateHeader_t
{
// virtual const register states
fltx4 m_reg[MAX_VIRTUAL_CONST_COUNT];
fltx4 * GetBufferStart()
{
return ( fltx4* )( this + 1 ) ; // the buffer start address
}
}
ALIGN128_POST;
}
namespace job_fpcpatch2
{
using job_fpcpatch::MAX_VIRTUAL_CONST_COUNT;
using job_fpcpatch::FLAG_PUT_STATE;
using job_fpcpatch::FLAG_BREAK_JOB;
using job_fpcpatch::FLAG_DEFER_STATE;
using job_fpcpatch::FLAG_UNDEFER_STATE;
using job_fpcpatch::ConstRangeHeader_t;
using job_fpcpatch::FpcPatchStateHeader_t;
using job_fpcpatch::FpcPatchState_t;
struct ALIGN16 FpHeader_t
{
uint32 m_nUcodeSize;
// patches follow Ucode; patch is struct{ uint16 nConstIndex; uint16 nConstOffset; }
// the offset is a qword index ( offset from the start of Ucode div 16 )
uint32 m_nPatchCount;
uint32 m_nShaderControl0;
uint32 m_nTexControls; // Always <= 16; 1 tex control corresponds to 2 words in the tex control table
// the dma size without the texcontrols
uint GetDmaSize() const
{
return sizeof( *this ) + m_nUcodeSize + m_nPatchCount * sizeof( uint32 );
}
static uintp GetUcodeEa( uint eaFpHeader )
{
return eaFpHeader + sizeof( FpHeader_t );
}
uintp GetPatchTableEa( uint eaFpHeader )const
{
return GetUcodeEa( eaFpHeader ) + m_nUcodeSize;
}
uintp GetTexControlsEa( uint eaFpHeader )const
{
return GetPatchTableEa( eaFpHeader ) + AlignValue( m_nPatchCount * sizeof( uint32 ), 16 );
}
uintp GetTexControlsBytes( )const
{
return sizeof( uint32 ) * 2 * m_nTexControls;
}
#if !defined( SPU )
const void * GetUcode()const
{
return ( void* )GetUcodeEa( ( uintp ) this );
}
const uint32 * GetPatchTable( )const
{
return ( uint32* )( uintp( GetUcode() ) + m_nUcodeSize );
}
const uint32 * GetTexControls()const
{
return ( uint32* )GetTexControlsEa( ( uintp )this );
}
#endif
}
ALIGN16_POST;
}
#endif

View File

@ -0,0 +1,56 @@
//========= Copyright <20> 1996-2008, Valve Corporation, All rights reserved. ============//
#ifndef VJOBS_IBMARKUP_SHARED_HDR
#define VJOBS_IBMARKUP_SHARED_HDR
namespace OptimizedModel
{
//
// On PS3 index buffer is laid out very specially:
//
// Header:
// 0xFFFE 0xFFFE 0xFFFE 0xFFFE
//
#ifdef _WIN32
#pragma warning( push )
#pragma warning( disable : 4200 )
#endif
struct OptimizedIndexBufferMarkupPs3_t
{
static const uint64 kHeaderCookie = 0xFFFEFFFEFFFEFFFEull;
static const uint16 kVersion1 = 0x0001;
uint64 m_uiHeaderCookie;
uint16 m_uiVersionFlags;
uint16 m_numBytesMarkup;
uint32 m_numPartitions;
uint32 m_numIndicesTotal;
uint32 m_numVerticesTotal;
uint32 m_nEdgeDmaInputOffsetPerStripGroup;
uint32 m_nEdgeDmaInputSizePerStripGroup;
struct Partition_t
{
uint32 m_numIndicesToSkipInIBs;
uint32 m_numVerticesToSkipInVBs;
uint32 m_nIoBufferSize;
uint32 m_numIndices;
uint32 m_numVertices;
uint32 m_nEdgeDmaInputIdx;
uint32 m_nEdgeDmaInputVtx;
uint32 m_nEdgeDmaInputEnd;
};
Partition_t m_partitions[0];
};
#ifdef _WIN32
#pragma warning( pop )
#endif
}
#endif

View File

@ -0,0 +1,312 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#if !defined( VJOBS_JOBPARAMS_SHARED_HDR ) && defined( _PS3 )
#define VJOBS_JOBPARAMS_SHARED_HDR
#include "ps3/spu_job_shared.h"
// these structure belong in its own headers in public/vjobs, but they're small and I don't want to pollute public with such trivialities
namespace job_ctxflush
{
struct JobParams_t
{
uint32 m_nUsefulCmdBytes;
uint32 m_nNewPcbringEnd;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, CellSpursJob128 >( pJob );
}
}
namespace job_gcmstateflush
{
typedef CellSpursJob128 JobDescriptor_t;
struct JobParams_t
{
uint32 m_nSpuDrawQueueSignal;
uint16 m_nSizeofDrawQueueUploadWords; // this may be unaligned, and it counts bytes from the unaligned start
uint16 m_nSkipDrawQueueWords;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
namespace job_hello
{
struct ALIGN128 Exchange_t
{
uint32 m_numSpusJoined;
uint32 m_nStage;
uint64 m_nIncrementer[2];
} ALIGN128_POST;
}
namespace job_zpass
{
typedef CellSpursJob128 JobDescriptor_t;
enum ConstEnum_t
{
PHASE_ZPREPASS,
PHASE_RENDER,
PHASE_END
};
struct JobParams_t
{
uint8 m_nPhase;
uint8 m_nDebuggerBreak;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
struct CellMP3Context;
// Edge Zlib compression job
namespace job_zlibdeflate
{
typedef CellSpursJob128 JobDescriptor_t;
struct JobParams_t
{
uint16 IsDone()const { return *(volatile uint16*)&m_nStatus; }
void * m_eaOutputCompressedData;
uint32 m_nMaxCompressedOutputSize;
void * m_eaInputUncompressedData;
uint32 m_nUncompressedSize;
uint32 m_nError;
// 0 : compressed data was larger than uncompressed or compression error, store uncompressed
// the MSB is set when data is compressed
uint32 m_nCompressedSizeOut;
uint16 m_nStatus; // will be non-0 when the job is done
uint16 m_nDebuggerBreak;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
// Edge Zlib decompression job
namespace job_zlibinflate
{
typedef CellSpursJob128 JobDescriptor_t;
struct JobParams_t
{
uint16 IsDone()const { return *(volatile uint16*)&m_nStatus; }
void *m_eaUncompressedOutput;
uint32 m_nExpectedUncompressedSize;
void *m_eaCompressed;
uint32 m_nCompressedSize;
// 0 : decompressed without error
uint32 m_nError;
uint16 m_nStatus; // will be non-0 when the job is done
uint16 m_nDebuggerBreak;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
namespace job_edgemlaa
{
typedef CellSpursJob128 JobDescriptor_t;
struct JobParams_t
{
uint32 m_nDebuggerBreakMask;
uint32 *m_eaJts; // patch this with RETURN
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
namespace job_buildindices
{
typedef CellSpursJob128 JobDescriptor_t;
struct JobParams_t
{
int m_testInt_IN;
int m_testInt_OUT;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
namespace job_buildrenderables
{
typedef CellSpursJob128 JobDescriptor_t;
struct JobParams_t
{
int m_testInt_IN;
int m_testInt_OUT;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
namespace job_buildworldlists
{
typedef CellSpursJob256 JobDescriptor_t;
struct JobParams_t
{
uint32 m_nDebugBreak;
uint32 m_eaWorldNodes;
int m_visframecount;
uint32 m_pSurfaces2;
uint32 m_pmarksurfaces;
uint32 m_pLeafs;
float m_ModelOrg[4];
bool m_bViewerInSolidSpace;
uint32 m_Disp_ParentSurfID_offset;
uint32 m_Disp_BB_offset;
uint32 m_Disp_Info_Size;
uint32 m_pDispInfos;
uint32 m_eaFrustum;
uint32 m_nAreaFrustum;
uint32 m_eaAreaFrustum;
uint32 m_eaRenderAreaBits;
uint32 m_eaDispInfoReferences;
uint32 m_nMaxVisitSurfaces;
uint32 m_nNumSortID;
bool m_bShadowDepth;
bool m_bDrawTopView;
bool m_bTopViewNoBackfaceCulling;
bool m_bTopViewNoVisCheck;
uint32 m_eaVolumeCuller;
float m_orthoCenter[2];
float m_orthoHalfDi[2];
int m_DrawFlags;
int m_buildViewID;
// inout
uint32 m_eaInfo;
uint32 m_eaRenderListLeaves;
// addr of output DMA structure
uint32 m_eaDMAOut;
// offset for CUtlVector Count
uint32 m_nUtlCountOffset;
};
struct ALIGN128 buildWorldListsDMAOut
{
// m_SortList
uint32 m_pSortList_m_list;
uint32 m_pSortList_m_groupsShared;
uint32 m_pSortList_m_groupIndices;
uint32 m_pSortList_m_sortGroupLists[4];
uint32 m_pSortList_m_listUtlPtr;
uint32 m_pSortList_m_groupsSharedUtlPtr;
uint32 m_pSortList_m_groupIndicesUtlPtr;
uint32 m_pSortList_m_sortGroupListsUtlPtr[4];
// m_DispSortList
uint32 m_pDispSortList_m_list;
uint32 m_pDispSortList_m_groupsShared;
uint32 m_pDispSortList_m_groupIndices;
uint32 m_pDispSortList_m_sortGroupLists[4];
uint32 m_pDispSortList_m_listUtlPtr;
uint32 m_pDispSortList_m_groupsSharedUtlPtr;
uint32 m_pDispSortList_m_groupIndicesUtlPtr;
uint32 m_pDispSortList_m_sortGroupListsUtlPtr[4];
// m_AlphaSurfaces
uint32 m_pAlphaSurfaces;
uint32 m_pAlphaSurfacesUtlPtr;
// m_DlightSurfaces
uint32 m_pDlightSurfaces[4];
uint32 m_pDlightSurfacesUtlPtr[4];
// m_PaintedSurfaces
uint32 m_pPaintedSurfaces[4];
uint32 m_pPaintedSurfacesUtlPtr[4];
// m_leaves
uint32 m_pLeaves;
uint32 m_pLeavesUtlPtr;
// m_VisitedSurfs
uint32 m_pVisitedSurfs;
// decal surf list
uint32 m_pDecalSurfsToAdd;
uint32 m_pDecalSurfsToAddUtlPtr;
// m_bSkyVisible
uint32 m_pSkyVisible;
// m_bWaterVisible
uint32 m_pWaterVisible;
} ALIGN128_POST;
struct decalSurfPair
{
uint32 m_surfID;
int m_renderGroup;
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
#endif

View File

@ -0,0 +1,67 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#include "ps3/spu_job_shared.h"
#include "vjobs/mp3dec_shared.h"
// layer 3, version {2, 1} ( indexed by LSB of m_nAudioVersionId)
uint8 s_mp3_bitrate_8000[2][16] = // bitrate / 8000
{
{ // version 2, version id = 10b
0, // free
8 / 8,
16 / 8,
24 / 8,
32 / 8,
40 / 8,
48 / 8,
56 / 8,
64 / 8,
80 / 8,
96 / 8,
112 / 8,
128 / 8,
144 / 8,
160 / 8,
0// bad
},
{ // version 1, version id = 11b
0, // free
32 / 8,
40 / 8,
48 / 8,
56 / 8,
64 / 8,
80 / 8,
96 / 8,
112 / 8,
128 / 8,
160 / 8,
192 / 8,
224 / 8,
256 / 8,
320 / 8,
0 //bad
}
};
uint16 s_mp3_samplingrate_div50[2][4] =
{
{// version 2, version id = 10b
22050/50,
24000/50,
16000/50,
0 // reserved
},
{// version 1, version id = 11b
44100/50,
48000/50,
32000/50,
0
}
};
void job_mp3dec::Context_t::Init()
{
ZeroMemAligned( this, sizeof( *this ) );
}

View File

@ -0,0 +1,326 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#ifndef VJOBS_MP3DEC_SHARED_HDR
#define VJOBS_MP3DEC_SHARED_HDR
#include "ps3/spu_job_shared.h"
#ifdef SPU
#include "Mp3DecSpuLib.h"
#else
#include "mp3declib.h"
#endif
extern uint8 s_mp3_bitrate_8000[2][16];
extern uint16 s_mp3_samplingrate_div50[2][4];
struct Mp3FrameHeader
{
// http://www.mars.org/pipermail/mad-dev/2002-January/000425.html
// The absolute theoretical maximum frame size is 2881 bytes: MPEG 2.5 Layer II,
// 8000 Hz @ 160 kbps, with a padding slot
enum ConstEnum_t{
MAX_FRAME_LENGTH = 2881,
// Theoretical frame sizes for Layer III range from 24 to 1441 bytes, but there
// is a "soft" limit imposed by the standard of 960 bytes
MAX_MP3_FRAME_LENGTH = 1441
};
// http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm is a good reference as to what the following bitfields mean
// WARNING
// this struct works in debugger perfectly, but if you use it in the SPU code, it'll sometimes be wrong
// don't use it for anything other than debugging
/*union
{
struct
{
uint m_nFrameSync :11; // all bits must be set at all times , or it's not really a frame header
uint m_nAudioVersion : 2;
uint m_nLayerDesc : 2;
uint m_nProtection : 1;
uint m_nBitrate : 4;
uint m_nSamplingRate : 2;
uint m_nPadding : 1;
uint m_nPrivateBit : 1;
uint m_nChannelMode : 2; // 01 (joint) isn't supported in mp3dec from Sony
uint m_nModeExtension : 2;
uint m_nCopyright : 1;
uint m_nOriginal : 1;
uint m_nEmphasis : 2;
};
*/
uint8 m_bits[4];
//};
//AAAAAAAA AAABBCCD EEEEFFGH IIJJKLMM
uint CheckSync()const{ return IsCorrectHeader( m_bits ); }
uint GetAudioVersionId()const
{
return ( m_bits[1] >> 3 ) & 3;
}
uint GetLayerDescId()const
{
return ( m_bits[1] >> 1 ) & 3;
}
uint GetProtection()const
{
return m_bits[1] & 1;
}
uint GetBitrateId()const
{
return m_bits[2] >> 4;
}
uint GetSamplingRateId()const
{
return ( m_bits[2] >> 2 ) & 3;
}
uint GetPadding()const
{
return ( m_bits[2] >> 1 ) & 1;
}
uint GetPrivateBit()const
{
return ( m_bits[2] ) & 1;
}
uint GetChannelModeId()const
{
return ( m_bits[3] >> 6 ) & 3;
}
uint GetModeExtensionId()const
{
return ( m_bits[3] >> 4 ) & 3;
}
uint GetCopyright()const
{
return ( m_bits[3] >> 3 ) & 1;
}
uint GetOriginal()const
{
return ( m_bits[3] >> 2 ) & 1;
}
uint GetEmphasisId()const
{
return ( m_bits[3] ) & 3;
}
inline uint GetFrameLengthIncludingHeader( bool bUsePadding = true )const
{
COMPILE_TIME_ASSERT( sizeof( *this ) == 4 );
Assert( CheckSync() && GetAudioVersionId() >= 2 && GetLayerDescId() == 1 ); // version2 , layer 3
// 1 kbps = 1024 bits per second = 128 bytes per second
uint nAudioVersion = GetAudioVersionId() & 1, nBitrate = GetBitrateId(), nSamplingRateId = GetSamplingRateId();
uint bitrate_8000 = s_mp3_bitrate_8000[ nAudioVersion ][ nBitrate ];
uint samplingrate_50 = s_mp3_samplingrate_div50[ nAudioVersion ][ nSamplingRateId ];
uint a;
// TODO: Change the table so we don't have to do this test
if ( nAudioVersion == 1 )
{
a = ( 144 * 8 * 20 ) * bitrate_8000;
}
else
{
a = ( 72 * 8 * 20 ) * bitrate_8000;
}
Assert( a > 0 && samplingrate_50 > 0 );
uint nLength = a / samplingrate_50;
if ( bUsePadding )
{
nLength += GetPadding();
}
return nLength;
}
inline uint GetFrameSamplingRate() const
{
return s_mp3_samplingrate_div50[ GetAudioVersionId()& 1][ GetSamplingRateId() ] * 50;
}
inline uint GetBitrateKbps()const
{
uint nAudioVersion = GetAudioVersionId() & 1, nBitrateId = GetBitrateId();
return s_mp3_bitrate_8000[ nAudioVersion ][ nBitrateId ] * 8;
}
// Checks that the header is similar. Padding differences are ignored.
// This will not work with VBR encoding.
inline bool IsSimilar(const Mp3FrameHeader & otherHeader) const
{
// TODO: Could be optimized. Although I doubt this is actually necessary.
bool b0 = m_bits[0] == otherHeader.m_bits[0];
bool b1 = m_bits[1] == otherHeader.m_bits[1];
bool b2 = (m_bits[2] & 0xFD) == (otherHeader.m_bits[2] & 0xFD);
bool b3 = m_bits[3] == otherHeader.m_bits[3];
return b0 & b1 & b2 & b3;
}
static bool IsCorrectHeader( const uint8 * h )
{
uint8 h0 = h[0], h1 = h[1];
// must be 11111111 1111x01x for V1 or V2, Layer 3 header
return ( h0 == 0xFF && ( h1 & 0xF6 ) == 0xF2 );
}
inline uint CorrectFrameLength( uint nLength, const uint8 * pStreamEnd )const
{
const uint8 * pFrameEnd = ( ( const uint8 * ) this ) + nLength;
if( pStreamEnd >= pFrameEnd + 1 + 2 )
{
if( IsCorrectHeader( pFrameEnd ) )
return nLength;
for( uint d = 1; d < 2; ++d )
{
if( IsCorrectHeader( pFrameEnd - d ) )
return nLength - d;
if( IsCorrectHeader( pFrameEnd + d ) )
return nLength + d;
}
}
return nLength;
}
// scan the byte stream to find the next header
inline uint CorrectFrameLength( const uint8 * pStreamEnd)const
{
uint nLength = GetFrameLengthIncludingHeader();
return CorrectFrameLength( nLength, pStreamEnd );
}
};
namespace job_mp3dec
{
enum ConstEnum_t
{
MP3_FRAME_SAMPLE_COUNT = 0x480,
// we need space for stereo mp3 frame, 16 bits per sample,
// and then 127 bytes on each side of it for misalignment,
// and then 127 bytes more for misalignment of this whole buffer size
// for smoother output with less pointless copying, specify more local store when initializing the job descriptor
IOBUFFER_SIZE = ( MP3_FRAME_SAMPLE_COUNT * 2 * sizeof( int16 ) + 3 * 127 ) & -128
};
struct ALIGN16 Context_t
{
CellMP3Context m_context[2];
int32 m_nInternalMp3Count;
int32 m_nLastBytesRead;
int32 m_nLastBytesWritten;
uint32 m_nTotalBytesRead;
uint32 m_nTotalBytesWritten;
void Init();
}ALIGN16_POST;
// a joblet may not be "allocated"; if it's "not complete" AND "allocated", only then will it be processed
// it may be deallocated any time after it's complete; it may be appended any time after it's complete; it may not suddenly become incomplete
struct ALIGN16 Joblet_t
{
enum FlagEnum_t
{
FLAG_DEBUG_STOP = 0x1000,
FLAG_DEBUG_SPIN = 0x2000,
FLAG_DECODE_INVALID_FRAME = 0x4000,
FLAG_DECODE_INCOMPLETE_FRAME = 0x8000,
FLAG_DECODE_WAV_SCATTER = 0x400,
FLAG_DECODE_MP3_GATHER = 0x200,
FLAG_DECODE_INIT_CONTEXT = 0x100, // don't take the context from main memory, init it in local to zeroes and then DMA it out
FLAG_DECODE_ERROR = 0x80, // an error happened during decode; COMPLETE bits is still set, you may read the buffer ends
FLAG_DECODE_EMPTY = 0x40, // empty input or output stream
FLAG_DECODE_COMPLETE = 0x20, // decoding complete; you may read the buffer ends
FLAG_ALLOCATED = 0x10,
FLAG_LITTLE_ENDIAN = 0x08,
FLAG_FULL_MP3_FRAMES_ONLY = 4,
// input: means m_eaWav will accept mono
// output: means m_eaWav points to mono samples
FLAG_MONO = 1,
// input: means m_eaWav will accept stereo
// output: means m_eaWav points to pairs of samples "left, right"
FLAG_STEREO = 2,
FLAGS_MONO_OR_STEREO = FLAG_MONO | FLAG_STEREO
};
uint32 m_nFlags;
uint32 m_nSkipSamples;
// input: max number of mp3 frames to decode;
// output: number of frames decoded
//uint32 m_nMp3Frames;
// input buffer
uint8 * m_eaMp3;
// output: the last decoded frame, for warming up
uint8 * m_eaMp3Last;
// output: the end of the buffer that has been read
uint8 *m_eaMp3Get;
// input: the end of the buffer allowed to read
uint8 * m_eaMp3End;
// output buffer
int16 * m_eaWave;
// output: the end of the buffer written to
int16 *m_eaWavePut;
// intput: the end of the buffer allocated for writing
int16 * m_eaWaveEnd;
// In/Out - 2 contexts
Context_t *m_eaContext;
bool NeedDecode()const { return ( m_nFlags & ( FLAG_DECODE_ERROR | FLAG_DECODE_COMPLETE | FLAG_ALLOCATED ) ) == FLAG_ALLOCATED; }
bool IsAllocated()const{ return (m_nFlags & FLAG_ALLOCATED) != 0; }
bool IsComplete()const { return (m_nFlags & FLAG_DECODE_COMPLETE) != 0; }
bool HasDecodingError()const { return (m_nFlags & FLAG_DECODE_ERROR) != 0; }
} ALIGN16_POST;
typedef CellSpursJob128 JobDescriptor_t;
struct ALIGN16 JobParams_t
{
void *m_eaDecoder;
Joblet_t *m_eaJoblets;
// joblet get index;
// SPU: volatile makes no sense because it's only operated on in LS; it's changed by multiple SPUs, though
// PPU: m_nGet can change at any time by SPU
PPU_ONLY( volatile ) uint32 m_nGet;
// how many jobs are started and didn't decide to quit yet.
// when this is down to 0 AFTER we advanced our m_nPut (don't forget the barrier!), we have to spawn more jobs
PPU_ONLY( volatile ) uint32 m_nWorkers;
// joblet put index ;
// SPU: volatile makes no sense because it's only operated on in LS
// PPU: volatile makes no sense because PPU uses atomics to access it, and SPU never changes this value
uint32 m_nPut;
//uint32 m_nJobletIndexMask;
enum ConstEnum_t {JOBLET_COUNT = 64 * 4 }; // Each sound uses 4 joblets at a time. Can decode 64 sounds in a row.
uint32 m_nDebuggerBreak;
uint32 m_nWorkerTotal; // total workers that ever existed (not including m_nWorkers, which are the currently started up workers)
uint32 m_nJobletsAcquired;
uint32 m_nWorkerLock; // min workers to hold on to
}ALIGN16_POST;
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
#endif

308
public/vjobs/pcring.h Normal file
View File

@ -0,0 +1,308 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
//
// Producer-consumer FIFO ring buffers
// THese are shared between SPU and PPU, and define some common misc functions
//
//
#ifndef VJOBS_PCRING_HDR
#define VJOBS_PCRING_HDR
#include "ps3/ps3_gcm_config.h"
#include "ps3/spu_job_shared.h"
struct ALIGN16 SetLabelAlignedCommand_t
{
union CmdUnion_t
{
uint32 m_nCmd[4];
vector unsigned int m_vuCmd;
};
CmdUnion_t m_cmd;
// uint32 m_nMethodSetSemaphoreOffset;
// uint32 m_nSemaphoreOffset;
// uint32 m_nMethodSemaphoreRelease;
// uint32 m_nSemaphoreValue;
void SetWriteTextureLabel( uint nIndex, uint nValue )
{
uint offset = 0x10 * nIndex;
#ifdef SPU
m_cmd.m_vuCmd = ( vector unsigned int )
{
CELL_GCM_METHOD(CELL_GCM_NV4097_SET_SEMAPHORE_OFFSET, 1),
(offset),
CELL_GCM_METHOD(CELL_GCM_NV4097_TEXTURE_READ_SEMAPHORE_RELEASE, 1),
(nValue)
};
#else
uint32 * p = m_cmd.m_nCmd;
CELL_GCM_METHOD_SET_SEMAPHORE_OFFSET(p, offset);
CELL_GCM_METHOD_TEXTURE_READ_SEMAPHORE_RELEASE(p, nValue);
#endif
}
void UpdateWriteTextureLabel( uint nValue )
{
m_cmd.m_nCmd[3] = nValue;
}
uint32 GetWriteTextureLabel( )
{
return m_cmd.m_nCmd[3];
}
}
ALIGN16_POST;
// read-only part of pcring
struct ALIGN16 PcRingRo_t
{
SetLabelAlignedCommand_t m_head[2]; // the variants of the head sync
void Init( uint nLabel );
};
//
// producer-consumer FIFO ring buffer for command buffer.
// Kept in main memory, controlled/produced by SPU, consumed by RSX
//
class ALIGN16 SysFifo
{
public:
uint32 m_eaBuffer; // the buffer begin, EA
uint32 m_nSize; // the whole buffer size
// the important thing here is that put and end pointers are independent
// and can be updated from different threads lock-free, wait-free
// we're putting into Put segment; we can increment it until we hit "end", at which point we need to wait for RSX to eat up and move the "End" forward
uint32 m_nPut, m_nEnd; // high bit means odd-even ring
enum {ODD_BIT = 0x80000000};
void Init( uintp eaBuffer, uint nSize, uint nPut = 0 )
{
// put may be anywhere (it must be the GCM control register PUT, realtively to eaBuffer), but it must be aligned and within the buffer
Assert( nPut < nSize && !( 0xF & nPut ) );
m_eaBuffer = ( uint32 )eaBuffer;
m_nSize = nSize;
m_nPut = nPut;
m_nEnd = ODD_BIT;
}
void HardReset()
{
m_nPut = 0;
m_nEnd = ODD_BIT;
}
// must wrap before put?
bool MustWrap( uint nPutBytes ) const
{
return ( ( m_nPut + nPutBytes ) & ~ODD_BIT ) > m_nSize;
}
bool IsOrdered( uint nSignal0, uint nSignal1 )
{
if( ( nSignal0 ^ nSignal1 ) & ODD_BIT )
{
return ( nSignal1 & ~ODD_BIT ) <= ( nSignal0 & ~ODD_BIT );
}
else
{
return nSignal0 <= nSignal1;
}
}
bool CanPutNoWrap( uint nPutBytes ) const
{
Assert( !MustWrap( nPutBytes ) );
if ( ( m_nPut ^ m_nEnd ) & ODD_BIT ) // bits are different => we have enough space till the end of the buffer
{
Assert( ( m_nPut | ODD_BIT ) >= ( m_nEnd | ODD_BIT ) ); // the End must be trailing behind Put , only in the NEXT ring
return true;
}
else
{
// bits are the same => we have continuous unsigned range between put, put+add, end
// we don't want to put commands up to m_nEnd because theoretically we can get to the situation when put==get and RSX will skip the whole SYSring
return ( m_nPut + nPutBytes < m_nEnd );
}
}
bool CanWrapAndPut( uint nPutBytes )const
{
if ( ( m_nPut ^ m_nEnd ) & ODD_BIT ) // to wrap, "end" must be in the next ring
{
// Important: Assume that we'll reset Put to 0 when we put ... and "add" must be before "end"
// we don't want to put commands up to m_nEnd because theoretically we can get to the situation when put==get and RSX will skip the whole SYSring
return ( nPutBytes < ( m_nEnd & ~ODD_BIT ) );
}
else
{
Assert( m_nPut <= m_nEnd ); // the End must be in front of Put, since it's in the same ring
return false;
}
}
void Wrap( )
{
Assert( ( m_nPut ^ m_nEnd ) & ODD_BIT );
m_nPut = ( ~m_nPut ) & ODD_BIT; // begin from the start, only in the next ring (invert odd/even)
}
// prepare to Put(nBytes); wrap if necessary; don't do anything unless subsequent Put(nBytes) is valid
enum PreparePutEnum_t
{
PUT_PREPARED_WRAPPED,
PUT_PREPARED_NOWRAP,
PUT_PREPARE_FAILED
};
PreparePutEnum_t PreparePut( uint nBytes )
{
if( MustWrap( nBytes ) )
{
if( CanWrapAndPut( nBytes ) )
{
Wrap();
Assert( CanPutNoWrap( nBytes ) );
return PUT_PREPARED_WRAPPED;
}
}
else
{
if( CanPutNoWrap( nBytes ) )
{
return PUT_PREPARED_NOWRAP;
}
}
return PUT_PREPARE_FAILED;
}
// NOTE: the guarantee of this function is that multiple Puts are additive: Put(100) is equivalent to Put(25),Put(75) and such
void Put( uint nPutBytes )
{
Assert( CanPutNoWrap( nPutBytes ) );
m_nPut += nPutBytes;
}
uint EaPut( )const
{
return m_eaBuffer + ( m_nPut & ~ODD_BIT );
}
uint PutToEa( uint nPut )const
{
return m_eaBuffer + ( nPut & ~ODD_BIT );
}
uint EaWrapAndPut()const // EA of PUT after Wrap() is executed
{
return m_eaBuffer + sizeof( SetLabelAlignedCommand_t );
}
// how much memory is left in this ring, without Wrapping?
int GetNoWrapCapacity()const
{
return m_nSize - ( m_nPut & ~ODD_BIT );
}
// returns a value that will signal that the buffer has been processed to m_nPut pointer
uint GetSignal()const
{
return m_nPut ^ ODD_BIT;
}
// FFFFFFFF would imply a 2-Gb buffer, which we clearly can't have on PS3
static uint GetInvalidSignal() { return 0xFFFFFFFF; }
const SetLabelAlignedCommand_t * GetHead( PcRingRo_t &ro )const
{
return &ro.m_head[m_nPut >> 31];
}
const SetLabelAlignedCommand_t * GetNextHead( PcRingRo_t &ro )const
{
return &ro.m_head[( ~m_nPut ) >> 31];
}
// notify about a signal coming in asynchronously , must be a result of GetSignal() after Put()
void NotifySignal( uint nSignal )
{
AssertSpuMsg( ( nSignal & ~ODD_BIT ) <= m_nSize, "{ea=0x%X,size=0x%X,put=0x%X,end=0x%X}.NotifySignal(0x%X)\n", m_eaBuffer, m_nSize, m_nPut, m_nEnd, nSignal );
if( SPUGCM_ENABLE_NOTIFY_RSX_GET )
{
// we can artificially set the signal ahead sometimes, because we have 2 streams of signals from RSX :
// THe control register GET and the cmd buffer label (GCM_LABEL_SYSRING_SIGNAL)
// so we'll filter extra signals here: we may NOT step back
if( !( ( nSignal ^ m_nEnd ) & ODD_BIT ) && nSignal < m_nEnd )
{
return;// skip this: signal and end are in the same ring and signal is earlier than end
}
}
AssertSpuMsg( ( ( nSignal ^ m_nPut ) & ODD_BIT ) ?
( nSignal & ~ODD_BIT ) <= ( m_nPut & ~ODD_BIT )// signal and put are in different rings
:
nSignal >= m_nPut, // signal and put are in the same ring
"{ea=0x%X,size=0x%X,put=0x%X,end=0x%X}.NotifySignal(0x%X)\n",
m_eaBuffer, m_nSize, m_nPut, m_nEnd, nSignal
);
m_nEnd = nSignal;
}
// NotifySignal() version that can tolerate outdated signals due to different latencies between SPU and RSX
void NotifySignalSafe( uint nSignal )
{
if( ( ( nSignal ^ m_nPut ) & ODD_BIT ) ?
( nSignal & ~ODD_BIT ) <= ( m_nPut & ~ODD_BIT )// signal and put are in different rings
:
nSignal >= m_nPut ) // signal and put are in the same ring
{
m_nEnd = nSignal;
}
}
bool IsSignalDifferent( uint nSignal )
{
return m_nEnd != nSignal;
}
// WARNING this is a debug-only function. Do not use for anything but debugging, because it's slow
// and because it will signal "finished" incorrectly when the whole ring is full
// Expects RSX get relative to the base of the buffer (i.e. 0 when Get == the byte 0 of this PCring)
bool NotifyRsxGet( uint nRsxControlRegisterGet )
{
if( nRsxControlRegisterGet == ( m_nPut & ~ODD_BIT ) )
{
m_nEnd = m_nPut ^ ODD_BIT; // assume this means we've processed all SYSRING buffer
return true;
}
else
{
return false;
}
}
bool IsRsxFinished( uint nRsxControlRegisterGet )
{
return nRsxControlRegisterGet == ( m_nPut & ~ODD_BIT );
}
bool IsDone()const
{
return ( m_nEnd == m_nPut ^ ODD_BIT );
}
}
ALIGN16_POST;
inline void PcRingRo_t::Init( uint nLabel )
{
m_head[0].SetWriteTextureLabel( nLabel, SysFifo::ODD_BIT | sizeof( SetLabelAlignedCommand_t ) ); // m_nPut == 0 -> signal == ODD_BIT
m_head[1].SetWriteTextureLabel( nLabel, sizeof( SetLabelAlignedCommand_t ) ); // m_nPut == ODD_BIT -> signal == 0
}
#endif

66
public/vjobs/root.h Normal file
View File

@ -0,0 +1,66 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#ifndef JOB_VJOBS_ROOT_HDR
#define JOB_VJOBS_ROOT_HDR
#ifdef _PS3
#include <cell/spurs.h>
#include "vjobs/edgegeomparams_shared.h"
#include "const.h"
#endif
// this structure gives the client a kind of "root access" to SPURS and all VJobs functionality
struct ALIGN128 VJobsRoot
{
enum AlignmentEnum_t {ALIGNMENT = 128};
enum {MAXPORTS_ANIM = 32};
#ifdef _PS3
cell::Spurs::Spurs m_spurs;
// the job queue processes a lot of Edge jobs, and edge jobs have the largest descriptors of all
cell::Spurs::JobQueue::JobQueue< 512, 256 > m_largeJobQueue;
cell::Spurs::JobQueue::JobQueue< 512, sizeof( job_edgegeom::JobDescriptor_t ) > m_smallJobQueue;
cell::Spurs::JobQueue::JobQueue< 512, sizeof( job_edgegeom::JobDescriptor_t ) > m_buildWorldRenderableJobQueue;
JobQueue::Port2 m_queuePortBlobulator, m_queuePortSound;//DECL_ALIGN( CELL_SPURS_JOBQUEUE_PORT2_ALIGN );
JobQueue::Port2 m_queuePortAnim[ MAXPORTS_ANIM ];
JobQueue::Port2 m_queuePortBuildIndices;
JobQueue::Port2 m_queuePortBuildWorldAndRenderables;
JobQueue::Port2 m_queuePortBuildWorld[ MAX_CONCURRENT_BUILDVIEWS ];
JobQueue::Port2 m_queuePortBuildRenderables[ MAX_CONCURRENT_BUILDVIEWS ];
uint64 m_nSpugcmChainPriority;
uint64 m_nEdgeChainPriority;
uint64 m_nFpcpChainPriority;
uint64 m_nSystemWorkloadPriority;
uint64 m_nSlimJobQueuePriority;
uint64 m_nBulkJobQueuePriority;
uint64 m_nEdgePostWorkloadPriority;
uint64 m_nGemWorkloadPriority;
const CellSpursJobHeader *m_pFpcPatch2;
const CellSpursJobHeader *m_pJobNotify;
const CellSpursJobHeader *m_pJobZPass;
const CellSpursJobHeader *m_pCtxFlush;
const CellSpursJobHeader *m_pGcmStateFlush;
const CellSpursJobHeader *m_pEdgeGeom;
const CellSpursJobHeader *m_pDrawIndexedPrimitive;
const CellSpursJobHeader *m_pJobBlobulator;
const CellSpursJobHeader *m_pJobSndUpsampler;
const CellSpursJobHeader *m_pJobMp3Dec;
const CellSpursJobHeader *m_pJobZlibInflate;
const CellSpursJobHeader *m_pJobZlibDeflate;
const CellSpursJobHeader *m_pJobAccumPose;
const CellSpursJobHeader *m_pJobBuildIndices;
const CellSpursJobHeader *m_pJobBuildRenderables;
const CellSpursJobHeader *m_pJobBuildWorldLists;
#endif
}
ALIGN128_POST;
#endif

View File

@ -0,0 +1,135 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#if !defined( JOB_SNDUPSAMPLER_SHARED_HDR ) && defined( _PS3 )
#define JOB_SNDUPSAMPLER_SHARED_HDR
#include "ps3/spu_job_shared.h"
// the PS3 audio buffer can have either 8 or 16 blocks of 256 samples each.
// The only sample frequency allowed is 48khz, so that's around 23 or 46 milliseconds.
#define CELLAUDIO_PORT_BUFFER_BLOCKS 16
#define SURROUND_HEADPHONES 0
#define SURROUND_STEREO 2
#define SURROUND_DIGITAL5DOT1 5
#define SURROUND_DIGITAL7DOT1 7
// 7.1 means there are a max of 6 channels
#define MAX_DEVICE_CHANNELS 8
/// the libaudio buffers are simply large arrays of float samples.
/// there's only two configurations: two-channel and eight-channel.
/*
* This is disabled as now we are only outputting surround.
* The stereo is pushed in the left and right of the surround structure.
*
struct libaudio_sample_stereo_t
{
float left;
float right;
};
*/
struct libaudio_sample_surround_t
{
float left;
float right;
float center;
float subwoofer;
float leftsurround;
float rightsurround;
float leftextend;
float rightextend;
};
namespace job_sndupsampler
{
typedef CellSpursJob256 JobDescriptor_t;
enum ConstEnum_t
{
INPUTFREQ = 44100,
OUTPUTFREQ = 48000,
BUFFERSIZE = 256 * 16, // < input samples, should be a power of two
OUTBUFFERSAMPLES
};
struct BlockTrace_t
{
float m_fractionalSamplesBefore;
uint32 m_n44kSamplesConsumed;
};
struct ALIGN16 JobOutput_t
{
uint32 m_nBlockWritten;
float m_fractionalSamples;
int32 m_ringHead;
int32 m_n44kSamplesConsumed;
BlockTrace_t m_trace[0x10];
}
ALIGN16_POST;
/// ring buffers: one interleaved pair for stereo, or five channels for surround.
/// I compress them back to signed shorts to conserve memory bandwidth.
struct stereosample_t
{
int16 left, right;
};
struct surroundsample_t
{
int16 left, right, center, surleft, surright;
};
struct JobParams_t
{
sys_addr_t m_eaPortBufferAddr; // may change when user switches surround <-> stereo
int32 m_availableInputBlocks;
int32 m_nextBlockIdxToWrite;
float m_volumeFactor;
float m_fractionalSamples;
int32 m_ringHead;
int32 m_ringCount;
float m_flMaxSumStereo;
float m_flBackChannelMultipler;
uint8 m_eaInputSamplesBegin0xF;
uint8 m_nDebuggerBreak;
int8 m_deviceChannels;
int8 m_nCellAudioBlockSamplesLog2;
int8 m_bIsSurround;
public:
inline float *GetEffectiveAddressForBlockIdx( unsigned nBlock )const
{
nBlock %= CELLAUDIO_PORT_BUFFER_BLOCKS;
return reinterpret_cast<float *>( m_eaPortBufferAddr + ( ( nBlock * sizeof(float) * m_deviceChannels ) << m_nCellAudioBlockSamplesLog2 ) );
}
inline bool IsSurround() const
{
return m_bIsSurround;
}
inline int OutputSamplesAvailable()
{
// returns the number of output samples we can generate (notice this rounds down, ie we may leave some fractional samples for next time)
return ( ( m_ringCount - 1 ) * OUTPUTFREQ ) / INPUTFREQ;
}
};
inline JobParams_t * GetJobParams( void *pJob )
{
return VjobGetJobParams< JobParams_t, JobDescriptor_t >( pJob );
}
}
#endif

View File

@ -0,0 +1,258 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
// the constants and declarations needed for the SPU draw queue
//
#ifndef VJOBS_SPUDRAWQUEUE_SHARED_HDR
#define VJOBS_SPUDRAWQUEUE_SHARED_HDR
#include "ps3/ps3_gcm_config.h"
#include "ps3/ps3_gcm_shared.h"
#include "ps3/dxabstract_gcm_shared.h"
enum SpuDrawQueueEnum_t
{
SPUDRAWQUEUE_NOP = 0,
SPUDRAWQUEUE_METHOD_MASK = 0xFF000000,
SPUDRAWQUEUE_NOPCOUNT_MASK = 0x0000FFFF,
SPUDRAWQUEUE_NOPCOUNT_METHOD = 0x01000000,
SPUDRAWQUEUE_SETRENDERSTATE_MASK = 0x000003FF,
SPUDRAWQUEUE_SETRENDERSTATE_METHOD = 0x02000000,
SPUDRAWQUEUE_SETSAMPLERSTATE_MASK = 0x000000FF, // 4 bits for Sampler, 4 bits for Type
SPUDRAWQUEUE_SETSAMPLERSTATE_METHOD = 0x03000000,
SPUDRAWQUEUE_SETVIEWPORT_MASK = 0x0000FFFF,
SPUDRAWQUEUE_SETVIEWPORT_METHOD = 0x04000000,
SPUDRAWQUEUE_SETTEXTURE_MASK = 0x000000FF,
SPUDRAWQUEUE_SETTEXTURE_METHOD = 0x05000000,
SPUDRAWQUEUE_RESETTEXTURE_MASK = 0x000000FF,
SPUDRAWQUEUE_RESETTEXTURE_METHOD = 0x06000000,
SPUDRAWQUEUE_RESETSURFACETOKNOWNDEFAULTSTATE_METHOD = 0x07000000,
SPUDRAWQUEUE_SETPIXELSHADER_METHOD = 0x08000000,
SPUDRAWQUEUE_SETVERTEXSHADER_METHOD = 0x09000000,
SPUDRAWQUEUE_SETVERTEXSHADERCONSTANT_B_MASK = 0x0000FFFF,
SPUDRAWQUEUE_SETVERTEXSHADERCONSTANT_B_METHOD = 0x0A000000,
SPUDRAWQUEUE_UNSETVERTEXSTREAMSOURCE_MASK = 0x0000000F,
SPUDRAWQUEUE_UNSETVERTEXSTREAMSOURCE_METHOD = 0x0B000000,
SPUDRAWQUEUE_SETVERTEXSTREAMSOURCE_MASK = 0x0000000F,
SPUDRAWQUEUE_SETVERTEXSTREAMSOURCE_METHOD = 0x0C000000,
SPUDRAWQUEUE_SETSAMPLERSTATEPART1_MASK = 0x0000000F,
SPUDRAWQUEUE_SETSAMPLERSTATEPART1_METHOD = 0x0D000000,
SPUDRAWQUEUE_SETSCISSORRECT_METHOD = 0x0E000000,
SPUDRAWQUEUE_UPDATESURFACE_METHOD = 0x0F000000,
SPUDRAWQUEUE_UPDATESURFACE_MASK = 0x0000FFFF,
SPUDRAWQUEUE_DRAWINDEXEDPRIMITIVE_METHOD = 0x10000000, // unused!
SPUDRAWQUEUE_CLEAR_METHOD = 0x11000000,
SPUDRAWQUEUE_CLEAR_MASK = D3DCLEAR_STENCIL | D3DCLEAR_ZBUFFER | D3DCLEAR_TARGET,
SPUDRAWQUEUE_SETVERTEXSHADERCONSTANT_F_MASK = 0x00FFFFFF,
SPUDRAWQUEUE_SETVERTEXSHADERCONSTANT_F_METHOD = 0x12000000,
SPUDRAWQUEUE_VERTEXBUFFERNEWOFFSET_METHOD = 0x15000000,
SPUDRAWQUEUE_BEGINZCULLREPORT_METHOD = 0x16000000,
SPUDRAWQUEUE_ENDZCULLREPORT_METHOD = 0x17000000,
SPUDRAWQUEUE_GCMCOMMANDS_METHOD = 0x18000000,
SPUDRAWQUEUE_GCMCOMMANDS_MASK = 0x00FFFFFF, // word count
SPUDRAWQUEUE_BEGINZPREPASS_METHOD = 0x19000000,
SPUDRAWQUEUE_BEGINZPREPASS_MASK = 0x00000FFF,
SPUDRAWQUEUE_PREDICATION_METHOD = 0x1A000000,
SPUDRAWQUEUE_PREDICATION_MASK = 0x00FFFFFF,
SPUDRAWQUEUE_ENDZPREPASS_METHOD = 0x1B000000,
SPUDRAWQUEUE_ENDZPREPASS_MASK = 0x00FFFFFF,
SPUDRAWQUEUE_ENDZPOSTPASS_METHOD = 0x1C000000,
SPUDRAWQUEUE_ENDZPOSTPASS_MASK = 0x0000FFFF,
SPUDRAWQUEUE_TRANSFER_METHOD = 0x1D000000,
SPUDRAWQUEUE_TRANSFER_MASK = 0x00000003, // transfer mode
SPUDRAWQUEUE_RELOAD_ZCULL_METHOD = 0x1E000000,
SPUDRAWQUEUE_RELOAD_ZCULL_MASK = 0x000000FF,
SPUDRAWQUEUE_FLUSH_FPCP_JOURNAL = 0x1F000000,
SPUDRAWQUEUE_FRAMEEVENT_METHOD = 0x20000000,
SPUDRAWQUEUE_FRAMEEVENT_MASK = 0x00000001,
//SPUDRAWQUEUE_SET_FP_CONSTS_METHOD = 0x21000000,
//SPUDRAWQUEUE_SET_FP_CONSTS_MASK = 0x0000FFFF,
SPUDRAWQUEUE_QUEUE_RSX_INTERRUPT_METHOD = 0x21000000,
SPUDRAWQUEUE_QUEUE_RSX_INTERRUPT_MASK = 0x000000FF,
SPUDRAWQUEUE_PERF_MARKER_METHOD = 0x22000000,
SPUDRAWQUEUE_PERF_MARKER_MASK = 0x00000FFF,
SPUDRAWQUEUE_PERF_MARKER_AAReplay = SPUDRAWQUEUE_PERF_MARKER_METHOD | 1,
SPUDRAWQUEUE_PERF_MARKER_AAReplayEnd = SPUDRAWQUEUE_PERF_MARKER_METHOD | 2,
SPUDRAWQUEUE_PERF_MARKER_DrawNormal = SPUDRAWQUEUE_PERF_MARKER_METHOD | 3,
SPUDRAWQUEUE_PERF_MARKER_DrawDeferred = SPUDRAWQUEUE_PERF_MARKER_METHOD | 4,
SPUDRAWQUEUE_DEFER_STATE = 0x23000000,
SPUDRAWQUEUE_UNDEFER_STATE = 0x24000000,
SPUDRAWQUEUE_DRAW_INLINE_METHOD = 0x30000000,
SPUDRAWQUEUE_DRAW_INLINE_MASK = 0x00FFFFFF,
SPUDRAWQUEUE_DEBUGRECTANGLE_METHOD = 0x31000000,
SPUDRAWQUEUE_FLUSHTEXTURECACHE_METHOD = 0x32000000,
SPUDRAWQUEUE_RESETRSXSTATE_METHOD = 0x33000000,
// Deferred queue commands, interpreted on PPU and never getting to SPU job_gcmflush
// they have the same format, except they're followed by 1 or 2 words of extra data ( SPUDRAWQUEUE_DEFERRED_HEADER_WORDS - 1 )
SPUDRAWQUEUE_DEFERRED_METHOD_MASK = SPUDRAWQUEUE_METHOD_MASK,
SPUDRAWQUEUE_DEFERRED_GCMFLUSH_METHOD = 0x40000000,
SPUDRAWQUEUE_DEFERRED_GCMFLUSH_MASK = 0x00000001,
SPUDRAWQUEUE_DEFERRED_GCMFLUSH_DRAW_METHOD = 0x40000001, // this flag means "flush this chunk only when really drawing the queue". Isn't executed when simply updating state (when the queue is recorded to be deferred, it still needs to be semi-executed to update the GCM state)
SPUDRAWQUEUE_DEFERRED_DRAW_METHOD = 0x41000000,
SPUDRAWQUEUE_DEFERRED_SET_FP_CONST_METHOD = 0x42000000,
SPUDRAWQUEUE_DEFERRED_SET_FP_CONST_MASK = 0x00FFFFFF,
SPUDRAWQUEUE_DEFERRED_HEADER_WORDS = 2 // set this to 3 to fill more debug data in
};
// this function must only be used in debugging
inline bool IsValidDeferredHeader( uint nCmd )
{
switch( nCmd & SPUDRAWQUEUE_METHOD_MASK )
{
case SPUDRAWQUEUE_DEFERRED_GCMFLUSH_METHOD:
return ( nCmd & ~SPUDRAWQUEUE_DEFERRED_GCMFLUSH_MASK ) == SPUDRAWQUEUE_DEFERRED_GCMFLUSH_METHOD;
case SPUDRAWQUEUE_DEFERRED_DRAW_METHOD:
return nCmd == SPUDRAWQUEUE_DEFERRED_DRAW_METHOD;
case SPUDRAWQUEUE_DEFERRED_SET_FP_CONST_METHOD:
return true;
default:
return false;
}
}
enum SpuDrawQueueFrameEventEnum_t
{
SDQFE_BEGIN_FRAME = 0,
SDQFE_END_FRAME = 1
};
struct SpuGcmEdgeGeomParams_t;
namespace OptimizedModel
{
struct OptimizedIndexBufferMarkupPs3_t;
}
struct ALIGN16 SpuDrawHeader_t
{
//uint32 m_nCookie0;
uint16 m_dirtyCachesMask;
/*D3DPRIMITIVETYPE*/uint16 m_nType;
int32 m_nBaseVertexIndex;
uint32 m_nMinVertexIndex;
uint32 m_numVertices;
uint32 m_nStartIndex;
uint32 m_nPrimCount;
uint32 m_nLocalOffsetIndexBuffer;
uint32 m_nPs3texFormatCount;
uint32 m_nFpcpEndOfJournalIdx;
uint32 m_nUsefulCmdBytes;
uint32 m_nPcbringBegin;
uint32 m_nResultantSpuDrawGet; // LSB is the index into the m_nSpuDrawGet[]
uint32 m_nEdgeDebugFlags;
//uint32 m_nSizeofPcbUploaded; // the number of useful bytes uploaded from PCBring
//SpuGcmEdgeGeomParams_t *m_eaEdgeGeomParams; <- this is passed in job descriptor to job_edgegeom
//uint32 m_numEdgeGeomVertices;
//uint32 m_numEdgeIoBufferSize;
uint32 m_eaEdgeDmaInputBase;
OptimizedModel::OptimizedIndexBufferMarkupPs3_t *m_eaIbMarkup;
uint32 m_nIbMarkupPartitions;
uint32 m_nDrawIndexedPrimitives;
//uint32 m_nCookie1;
}
ALIGN16_POST;
struct SpuDrawScissor_t
{
uint16 x, y, w, h;
};
struct SpuDrawDeviceClear_t
{
D3DCOLOR m_nColor;
float m_flZ;
uint32 m_nStencil;
uint32 m_nDepthStencilBitDepth;
};
struct SpuDrawDebugRectangle_t
{
D3DCOLOR color;
uint16_t x,y,w,h;
};
struct SpuDrawTransfer_t
{
uint32 m_nLineSize;
uint32 m_nOldOffset;
uint32 m_nNewOffset;
};
//
struct SpuUpdateSurface_t
{
// if the scissor is logically disabled, set scissor to this size
//uint16 m_nRenderTargetWidth, m_nRenderTargetHeight;
CPs3gcmTextureData_t m_texC, m_texZ;
};
struct SpuSetSamplerStatePart1_t
{
D3DSamplerDescPart1 m_desc;
CPs3gcmTextureData_t m_tex;
};
#endif