This commit is contained in:
nephacks
2025-06-04 03:22:50 +02:00
parent f234f23848
commit f12416cffd
14243 changed files with 6446499 additions and 26 deletions

View File

@@ -0,0 +1,76 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// SPU Profiling
//
//==================================================================================================
#ifndef INCLUDED_CELLMGR_SPU_PROFILE_H
#define INCLUDED_CELLMGR_SPU_PROFILE_H
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include <stdint.h>
//--------------------------------------------------------------------------------------------------
// Defines
//--------------------------------------------------------------------------------------------------
// Uncomment to enabled profiling
//#define ENABLE_SPU_PROFILE
//--------------------------------------------------------------------------------------------------
// Constants
//--------------------------------------------------------------------------------------------------
const int NUM_BOOKMARKS_IN_EVENT = 6;
//--------------------------------------------------------------------------------------------------
// Functions
//--------------------------------------------------------------------------------------------------
/*
* Insert a marker that is displayed in Tuner
*/
void insert_bookmark( uint32_t bookmark );
/*
* 400 cycles delay per bookmark when emitting bookmarks on multiple SPUs
*/
void bookmark_delay( int NumBookmarks );
/*
* Inserting 6 SPU bookmarks, which will
* be identified by Tuner as a start event
*/
void raw_spu_prof_start( int iLevel, uint16_t lsa );
/*
* Inserting 6 SPU bookmarks, which will
* be identified by Tuner as a stop event
*/
void raw_spu_prof_stop( uint16_t lsa );
/*
*Profiling macros
*/
#ifdef ENABLE_SPU_PROFILE
#define BEGIN_PROFILE(level) raw_spu_prof_start(level, 0)
#define END_PROFILE(level) raw_spu_prof_stop(level)
#define BEGIN_BOOKMARK(colour) insert_bookmark( colour )
#define END_BOOKMARK(colour)
#else
#define BEGIN_PROFILE(level)
#define END_PROFILE(level)
#define BEGIN_BOOKMARK(colour)
#define END_BOOKMARK(colour)
#endif
#endif // INCLUDED_CELLMGR_SPU_PROFILE_H

View File

@@ -0,0 +1,76 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include "Profile.h"
#include <spu_intrinsics.h>
//--------------------------------------------------------------------------------------------------
// Functions
//--------------------------------------------------------------------------------------------------
/*
* Insert a marker that is displayed in Tuner
*/
void insert_bookmark( uint32_t bookmark )
{
__asm__ volatile ("wrch $69, %0" :: "r" (bookmark));
// Must wait for 16 cycles
__asm__ volatile ("nop;nop;nop;nop;nop;nop;nop;nop");
__asm__ volatile ("nop;nop;nop;nop;nop;nop;nop;nop");
}
void bookmark_delay( int NumBookmarks )
{
// 400 cycles per bookmark when emitting bookmarks on both SPUs
for ( int i=0; i<NumBookmarks*400/8; i++)
{
__asm__ volatile ("nop;nop;nop;nop;nop;nop;nop;nop");
}
}
/*
* Inserting 6 SPU bookmarks, which will
* be identified by Tuner as a start event
*/
void raw_spu_prof_start( int iLevel, uint16_t lsa )
{
typedef union { char c4[4]; uint16_t u16[2]; uint32_t u32; } Module_u;
static Module_u s_mu = { { 't', 'e', 's', 't' } };
insert_bookmark( 0xffaa ); // start marker 1
insert_bookmark( s_mu.u16[0] ); // name
insert_bookmark( s_mu.u16[1] ); // name
insert_bookmark( iLevel ); // level
insert_bookmark( lsa >> 2 ); // LSA is shifted by 2 as per the SPURS spec.
insert_bookmark( 0xffab ); // start marker 2
bookmark_delay( NUM_BOOKMARKS_IN_EVENT );
}
/*
* Inserting 6 SPU bookmarks, which will
* be identified by Tuner as a stop event
*/
void raw_spu_prof_stop( uint16_t lsa )
{
typedef union { uint16_t u16[4]; uint64_t u64; } GUID_u;
GUID_u guid;
qword insn = si_roti(*(qword*)(0x80 + lsa), 7);
qword pattern = (qword)(vec_uchar16){0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13};
guid.u64 = si_to_ullong(si_shufb(insn, insn, pattern));
insert_bookmark( 0xffac ); // start marker 1
insert_bookmark( guid.u16[0] ); // guid
insert_bookmark( guid.u16[1] ); // guid
insert_bookmark( guid.u16[2] ); // guid
insert_bookmark( guid.u16[3] ); // guid
insert_bookmark( 0xffad ); // start marker 2
bookmark_delay( NUM_BOOKMARKS_IN_EVENT );
}

View File

@@ -0,0 +1,98 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
#ifndef INCLUDED_SPUMGR_DMA_H
#define INCLUDED_SPUMGR_DMA_H
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include <stdint.h>
#ifdef SPU
//#include <Stdshader_spu/Inc/debug_spu.h> // MH
#else
#include <debug/inc/debug.h>
#endif
//--------------------------------------------------------------------------------------------------
// Defines
//--------------------------------------------------------------------------------------------------
#define SPUMGR_IS_ALIGNED(val, align) (((val) & ((align) - 1)) == 0)
#define SPUMGR_ALIGN_UP(val, align) (((val) + ((align)-1)) & ~((align) - 1))
#define SPUMGR_ALIGN_DOWN(val, align) ((val) & ~((align) - 1))
#define SPUMGR_MSG_MEMCPY 0x000000ff
#define Assert(val) // MH
//--------------------------------------------------------------------------------------------------
// Types
//--------------------------------------------------------------------------------------------------
struct MemCpyHeader
{
uint32_t src;
uint32_t dst;
uint32_t size;
uint32_t blocking;
uint8_t cacheLine[16];
};
//--------------------------------------------------------------------------------------------------
// Classes
//--------------------------------------------------------------------------------------------------
struct DMAList
{
uint32_t stallAndNotify :1;
uint32_t reserved :16;
uint32_t size :15;
uint32_t ea;
};
//--------------------------------------------------------------------------------------------------
// DmaCheckAlignment
// Checks restrictions specified in SpuMgr::DmaGet
//--------------------------------------------------------------------------------------------------
int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size);
//--------------------------------------------------------------------------------------------------
//SetupDmaListEntry
//
// Note that this function increments input ptr by number of entries added,
// which will be > 1 if size > 16K
//--------------------------------------------------------------------------------------------------
inline void SetupDmaListEntry(uint32_t stall, uint32_t ea, uint32_t size, DMAList **pDmaList)
{
// check alignment; don't pass in NULL for dest
if (!DmaCheckAlignment(ea, 0x10, size))
{
Assert(0);
}
Assert((size & 0xF) == 0); // for lists input sizes must be multiple of 16 bytes
while (size)
{
uint32_t dmaSize = 0x4000;
dmaSize = size < dmaSize? size: dmaSize;
(*pDmaList)->stallAndNotify = stall;
(*pDmaList)->size = dmaSize;
(*pDmaList)->ea = ea;
size -= dmaSize;
ea += dmaSize;
(*pDmaList)++;
}
}
#endif // INCLUDED_SPUMGR_DMA_H

View File

@@ -0,0 +1,628 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include "sys/memory.h"
#include "sysutil/sysutil_sysparam.h"
#include "cell/sysmodule.h"
#include "tier0/platform.h"
#include "tier0/dbg.h"
#include "tier1/utlbuffer.h"
#include <sys/timer.h>
#include <sys/spu_image.h>
#include <stdio.h>
#include <stdlib.h>
#include <cell/cell_fs.h>
#include <cell/atomic.h>
#include <string.h>
#include "ps3_pathinfo.h"
#include <cell/spurs/control.h>
#include "SpuMgr_ppu.h"
#include "memdbgon.h"
typedef uint32_t uint32;
#define ASSERT Assert
//--------------------------------------------------------------------------------------------------
// Defines
//--------------------------------------------------------------------------------------------------
// Spu Mailbox Status Register
// Described in CBE architecture chapter 8.6.3 SPU Mailbox Status Register (SPU_Mbox_Stat)
#define SPU_IN_MBOX_COUNT_SHIFT (8)
#define SPU_IN_MBOX_COUNT (0xFF << SPU_IN_MBOX_COUNT_SHIFT)
#define SPU_OUT_MBOX_COUNT (0xFF)
#define SPU_OUT_INTR_MBOX_COUNT_SHIFT (16)
#define SPU_OUT_INTR_MBOX_COUNT (0xFF << SPU_OUT_INTR_MBOX_COUNT_SHIFT)
//--------------------------------------------------------------------------------------------------
// Globals
//--------------------------------------------------------------------------------------------------
// SPU manager instance
SpuMgr gSpuMgr;
//--------------------------------------------------------------------------------------------------
// DmaCheckAlignment
// Checks restrictions specified in SpuMgr::DmaGet
//--------------------------------------------------------------------------------------------------
int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size)
{
#if !defined( _CERT )
uint32_t align = size;
bool error = false;
if (size >= 16 && ((size & 0xf) == 0))
{
align = 16;
}
else if (size == 8 || size == 4 || size == 2 || size == 1)
{
error = ((src & 0xF) != (dest & 0xF));
}
else
{
error = true; // bad size
}
return (!error && src && dest &&
SPUMGR_IS_ALIGNED(src, align) &&
SPUMGR_IS_ALIGNED(dest, align));
#else //!_CERT
return 1;
#endif //!_CERT
}
//--------------------------------------------------------------------------------------------------
// Internal functions
//--------------------------------------------------------------------------------------------------
//--------------------------------------------------------------------------------------------------
// handle_syscall
//
// interrupt handler to handle SPU interrupts
// see Handle SPU Interrupts Lv2-Uders_manual_e P34
//--------------------------------------------------------------------------------------------------
void handle_syscall (uint64_t arg)
{
sys_raw_spu_t id = arg;
uint64_t stat;
int ret;
#ifndef _CERT
g_snRawSPULockHandler();
#endif
// Create a tag to handle class 2 interrupt, SPU halts fall in
// this category
ret = sys_raw_spu_get_int_stat(id, 2, &stat);
if (ret)
{
#ifndef _CERT
g_snRawSPUUnlockHandler();
#endif
sys_interrupt_thread_eoi();
}
//
// SPU Stop-and-Signal Instruction Trap
// This interrupt occurs when the SPU executes a stop-and-signal
// instruction.
//
if (stat & INTR_STOP_MASK) //stop
{
//We've hit a stop, so what kind of value is it?
uint32_t signalVal = GetStopSignal( id );
switch ( signalVal )
{
case 0x3:
// it was a stop that is in the SPU code to signal to the PPU
// do any processing for the user defined stop here
// if we do not restart the SPU then we need to call g_snRawSPUNotifySPUStopped(id)
// to inform the debugger that SPU has stopped
//restart the SPU
sys_raw_spu_mmio_write( id, SPU_RunCntl, 0x1 );
break;
default:
#ifndef _CERT
g_snRawSPUNotifySPUStopped(id);
#endif
break;
}
}
else if (stat & INTR_HALT_MASK) // halt
{
#ifndef _CERT
g_snRawSPUNotifySPUStopped(id);
#endif
}
// Other class 2 interrupts could be handled here
// ...
//
// Must reset interrupt status bit of those not handled.
//
ret = sys_raw_spu_set_int_stat(id, 2, stat);
if (ret)
{
#ifndef _CERT
g_snRawSPUUnlockHandler();
#endif
sys_interrupt_thread_eoi();
}
//
// End of interrupt
//
#ifndef _CERT
g_snRawSPUUnlockHandler();
#endif
sys_interrupt_thread_eoi();
}
int CreateDefaultInterruptHandler(SpuTaskHandle *pTask)
{
int res = 0;
//
// Create a SPU interrupt handler thread, an interrupt tag,
// and associate it with the thread
//
// create thread
if (sys_ppu_thread_create(&pTask->m_ppuThread, handle_syscall,
0, INTR_HANDLER_THREAD_PRIORITY, INTR_HANDLER_THREAD_STACK_SIZE,
SYS_PPU_THREAD_CREATE_INTERRUPT, "Interrupt PPU Thread"))
{
res = 1;
goto xit;
}
// create interrupt tag for handling class 2 interrupts from this spu
if (sys_raw_spu_create_interrupt_tag(pTask->m_spuId, 2, SYS_HW_THREAD_ANY, &pTask->m_intrTag))
{
res = 1;
goto xit;
}
// associate interrupt tag with thread
if (sys_interrupt_thread_establish(&pTask->m_interruptThread, pTask->m_intrTag,
pTask->m_ppuThread, pTask->m_spuId))
{
res = 1;
goto xit;
}
// Set interrupt mask - enable Halt, Stop-and-Signal interrupts
if (sys_raw_spu_set_int_mask(pTask->m_spuId, 2, INTR_STOP_MASK | INTR_HALT_MASK))
{
res = 1;
goto xit;
}
xit:
return res;
}
//--------------------------------------------------------------------------------------------------
// Class Methods
//--------------------------------------------------------------------------------------------------
int SpuMgr::Init(int numRawSpu)
{
// Need at least 2 SPUs for SPURS instances
ASSERT(numRawSpu < 5);
// Run SPURS on all SPUs that are not in raw mode
// Creating two SPURS instances. One with a thread group of 5 - numRawSpu threads and one
// with a thread group of 1 thread.
// The instance with a single thread is designed to be singled out as the preemption victim
// when the OS needs to use an SPU. We ensure this by giving it a lower priority than the
// dedicated SPURS instance.
// Init dedicated SPUs SPURS instance
// CellSpursAttribute attr;
// int32 ret = cellSpursAttributeInitialize(&attr, 5 - numRawSpu, 99, 2, false);
// ASSERT(ret == CELL_OK);
// ret = cellSpursAttributeEnableSpuPrintfIfAvailable(&attr);
// ASSERT(ret == CELL_OK);
// ret = cellSpursAttributeSetNamePrefix(&attr, "gameSpusSpurs", std::strlen("gameSpusSpurs"));
// ASSERT(ret == CELL_OK);
// ret = cellSpursInitializeWithAttribute2(&m_exclusiveSpusSpurs, &attr);
// ASSERT(ret == CELL_OK);
// Init pre-emption SPU SPURS instance
// ret = cellSpursAttributeInitialize(&attr, 1, 100, 2, false);
// ASSERT(ret == CELL_OK);
// ret = cellSpursAttributeEnableSpuPrintfIfAvailable(&attr);
// ASSERT(ret == CELL_OK);
// ret = cellSpursAttributeSetNamePrefix(&attr, "sharedSpuSpurs", std::strlen("sharedSpuSpurs"));
// ASSERT(ret == CELL_OK);
// ret = cellSpursInitializeWithAttribute2(&m_preemptedSpuSpurs, &attr);
// ASSERT(ret == CELL_OK);
int res = 0;
// set up members
m_numSpus = 0;
// Initialize SPUs
if (sys_spu_initialize(6, numRawSpu) != SUCCEEDED)
{
res = 1;
goto xit;
}
// Create raw spus
for (; m_numSpus < (uint32)numRawSpu; m_numSpus++)
{
if (sys_raw_spu_create(&m_spuIds[m_numSpus], NULL) != SUCCEEDED)
{
Error("Unable to create saw spu\n");
res = 1;
goto xit;
}
#ifndef _CERT
g_snRawSPUNotifyCreation(m_spuIds[m_numSpus]);
#endif
m_spuInUse[m_numSpus] = 0;
}
xit:
return res;
}
void SpuMgr::Term()
{
uint32 spu;
// destroy raw spus
for (spu = 0; spu < m_numSpus; spu++)
{
sys_raw_spu_destroy(m_spuIds[spu]);
}
// destroy the SPURS instances
// int ret;
// ret = cellSpursfinalize(&m_exclusiveSpusSpurs);
// ASSERT(ret == CELL_OK);
//
// ret = cellSpursfinalize(&m_preemptedSpuSpurs);
// ASSERT(ret == CELL_OK);
m_numSpus = 0;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
uint32_t spumgr_mmio_read(uint32_t spu, uint32_t regoffset)
{
uint64_t addr = get_reg_addr(spu,regoffset);
addr &= 0xffffffffUL;
volatile uint32_t * pAddr = (uint32_t*) addr;
return *pAddr;
}
void spumgr_mmio_write(int spu, int regoffset, uint32_t value)
{
uint64_t addr = get_reg_addr(spu,regoffset);
addr &= 0xffffffffUL;
volatile uint32_t * pAddr = (uint32_t*) addr;
*pAddr = value;
}
//--------------------------------------------------------------------------------------------------
// Create Spu task from file based image
//--------------------------------------------------------------------------------------------------
static char modPath[MAX_PATH];
int SpuMgr::CreateSpuTask(const char *path, SpuTaskHandle *pTask,
CreateSPUTaskCallback *pfnCallback /* = NULL */)
{
int res = 0;
int ret;
uint32 spu;
register uint32 spuid;
uint32 entry;
FILE* fp;
void* pSpuProg = NULL;
sys_spu_image_t img;
pTask->m_spuId = -1;
pTask->m_ppuThread = NULL;
pTask->m_intrTag = NULL;
pTask->m_interruptThread = NULL;
// find free raw spu
for (spu = 0; spu < m_numSpus; spu++)
{
if (!m_spuInUse[spu])
{
break;
}
}
// check we found free spu
if (spu == m_numSpus)
{
res = 1;
goto xit;
}
// Loading an SPU program to the Raw SPU.
//if (sys_raw_spu_load(m_spuIds[spu], path, &entry) != SUCCEEDED)
sprintf(modPath, "%s/%s", g_pPS3PathInfo->PrxPath(), path);
path = modPath;
if(strstr(path,".self"))
{
ret = sys_spu_image_open(&img, path);
if(ret != CELL_OK)
{
// (Running on Main Thread)
Error("Failed to open SPU program: %s\n", path);
}
}
else
{
// Allocate mem for SPU prog
CellFsStat stat;
cellFsStat(path,&stat);
pSpuProg = memalign(4096,((uint32)stat.st_size + 0x7f)&0xffffff80);
fp = fopen(path, "rb");
fread(pSpuProg, 1, stat.st_size, fp );
fclose(fp);
ret = sys_spu_image_import(&img, pSpuProg, SYS_SPU_IMAGE_PROTECT);
if (ret != CELL_OK)
{
res = 1;
goto xit;
}
}
ret = sys_raw_spu_image_load(m_spuIds[spu], &img);
spuid = m_spuIds[spu];
if (ret == CELL_OK)
{
// successfully loaded - mark spu as used and fill in o/p
m_spuInUse[spu] = 1;
pTask->m_spuId = spuid;
}
else
{
res = 1;
goto xit;
}
//Free PPU resources used to load image
if(pSpuProg)
{
free(pSpuProg);
}
sys_spu_image_close(&img);
entry = sys_raw_spu_mmio_read((uint32_t)spuid, (uint32_t)SPU_NPC);
#ifndef _CERT
g_snRawSPUNotifyElfLoad(spuid, entry, path);
#endif
// call callback or create default interrupt handler
if (!pfnCallback)
{
res = CreateDefaultInterruptHandler(pTask);
}
else
{
res = pfnCallback(pTask);
}
if (res)
{
goto xit;
}
// Run the Raw SPU
#ifndef _CERT
g_snRawSPUNotifySPUStarted(m_spuIds[spu]);
#endif
sys_raw_spu_mmio_write(spuid, SPU_NPC, entry);
sys_raw_spu_mmio_write(spuid, SPU_RunCntl, 0x1);
__asm("eieio");
// Once the SPU has started, write a mailbox with the effective address of the
// SPU lock.
WriteMailbox( pTask, (uint32) &pTask->m_lock );
WriteMailbox( pTask, (uint32) &pTask->m_memcpyLock );
xit:
if(res)
{
// Error("Error: CreateSpuTask error attempting to load and run %s on SPU\n", path);
}
return res;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SpuMgr::DestroySpuTask(SpuTaskHandle *pTask)
{
if (pTask->m_spuId != -1)
{
// Stop the Raw spu
#ifndef _CERT
g_snRawSPUNotifySPUStopped(pTask->m_spuId);
#endif
sys_raw_spu_mmio_write(pTask->m_spuId, SPU_RunCntl, 0x0);
__asm("eieio");
// Cleanup interrupt handling mechanism
if (pTask->m_interruptThread)
{
sys_interrupt_thread_disestablish(pTask->m_interruptThread); // also kills the thread
}
if (pTask->m_intrTag)
{
sys_interrupt_tag_destroy(pTask->m_intrTag);
}
}
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
int SpuMgr::WriteMailbox(SpuTaskHandle *pTask, uint32 val, bool bBlocking /* =true */)
{
uint32 mboxAvailable;
do
{
// Check the SPU Mailbox Status Register
mboxAvailable = sys_raw_spu_mmio_read(pTask->m_spuId, SPU_MBox_Status) & SPU_IN_MBOX_COUNT;
} while (bBlocking && !mboxAvailable);
if (mboxAvailable)
sys_raw_spu_mmio_write(pTask->m_spuId, SPU_In_MBox, (std::uint32_t)val);
return !mboxAvailable;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
int SpuMgr::ReadMailbox(SpuTaskHandle *pTask, uint32 *pVal, bool bBlocking /* = true */)
{
uint32 mailAvailable;
do
{
// Check the SPU Mailbox Status Register
mailAvailable = sys_raw_spu_mmio_read(pTask->m_spuId, SPU_MBox_Status) & SPU_OUT_MBOX_COUNT;
} while (bBlocking && !mailAvailable);
if (mailAvailable)
{
// Read the SPU Outbound Mailbox Register
*pVal = sys_raw_spu_mmio_read(pTask->m_spuId, SPU_Out_MBox);
}
return !mailAvailable;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
int SpuMgr::ReadIntrMailbox(SpuTaskHandle *pTask, uint32 *pVal, bool bBlocking /* = true */)
{
uint32 mailAvailable;
do
{
// Check the SPU Mailbox Status Register
mailAvailable = sys_raw_spu_mmio_read(pTask->m_spuId, SPU_MBox_Status) & SPU_OUT_INTR_MBOX_COUNT;
} while (bBlocking && !mailAvailable);
if (mailAvailable)
{
// Read the SPU Outbound Mailbox Register
sys_raw_spu_read_puint_mb(pTask->m_spuId, pVal);
}
return !mailAvailable;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
bool SpuMgr::Lock( SpuTaskHandle *pTask )
{
return cellAtomicCompareAndSwap32( &pTask->m_lock, 0, 1 ) == 0;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SpuMgr::Unlock( SpuTaskHandle *pTask )
{
cellAtomicCompareAndSwap32( &pTask->m_lock, 1, 0 );
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
bool SpuMgr::MemcpyLock( SpuTaskHandle *pTask )
{
return cellAtomicCompareAndSwap32( &pTask->m_memcpyLock, 0, 1 ) == 0;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SpuMgr::MemcpyUnlock( SpuTaskHandle *pTask )
{
cellAtomicCompareAndSwap32( &pTask->m_memcpyLock, 1, 0 );
}

View File

@@ -0,0 +1,238 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Raw SPU management
//
//==================================================================================================
#ifndef INCLUDED_SPUMGR_PPU_H
#define INCLUDED_SPUMGR_PPU_H
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include <sys/spu_initialize.h>
#include <sys/raw_spu.h>
#include <sys/spu_utility.h>
#include <sys/ppu_thread.h>
#include <sys/interrupt.h>
#include <sys/raw_spu.h>
#include <sys/sys_time.h>
#include <cell/spurs.h>
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
extern "C"
{
extern void (*g_snRawSPULockHandler) (void);
extern void (*g_snRawSPUUnlockHandler) (void);
extern void (*g_snRawSPUNotifyCreation) (unsigned int uID);
extern void (*g_snRawSPUNotifyDestruction) (unsigned int uID);
extern void (*g_snRawSPUNotifyElfLoad) (unsigned int uID, unsigned int uEntry, const char *pFileName);
extern void (*g_snRawSPUNotifyElfLoadNoWait) (unsigned int uID, unsigned int uEntry, const char *pFileName);
extern void (*g_snRawSPUNotifyElfLoadAbs) (unsigned int uID, unsigned int uEntry, const char *pFileName);
extern void (*g_snRawSPUNotifyElfLoadAbsNoWait) (unsigned int uID, unsigned int uEntry, const char *pFileName);
extern void (*g_snRawSPUNotifySPUStopped) (unsigned int uID);
extern void (*g_snRawSPUNotifySPUStarted) (unsigned int uID);
};
//--------------------------------------------------------------------------------------------------
// Fwd refs
//--------------------------------------------------------------------------------------------------
class CellSpurs2;
class SpuTaskHandle;
//--------------------------------------------------------------------------------------------------
// Defines
//--------------------------------------------------------------------------------------------------
#define MAX_RAW_SPUS 5
// Class 2 Interrupt Status Register (INT_Stat_class2)
// Described in CBE architecture v10 on page 259
#define INTR_PPU_MB_SHIFT 0
#define INTR_STOP_SHIFT 1
#define INTR_HALT_SHIFT 2
#define INTR_DMA_SHIFT 3
#define INTR_SPU_MB_SHIFT 4
#define INTR_PPU_MB_MASK (0x1 << INTR_PPU_MB_SHIFT)
#define INTR_STOP_MASK (0x1 << INTR_STOP_SHIFT)
#define INTR_HALT_MASK (0x1 << INTR_HALT_SHIFT)
#define INTR_DMA_MASK (0x1 << INTR_DMA_SHIFT)
#define INTR_SPU_MB_MASK (0x1 << INTR_SPU_MB_SHIFT)
// thread priority for interrupt handler threads
#define INTR_HANDLER_THREAD_PRIORITY 200
#define INTR_HANDLER_THREAD_STACK_SIZE 0x4000
#define SPUMGR_IS_ALIGNED(val, align) (((val) & ((align) - 1)) == 0)
#define SPUMGR_ALIGN_UP(val, align) (((val) + ((align)-1)) & ~((align) - 1))
#define SPUMGR_ALIGN_DOWN(val, align) ((val) & ~((align) - 1))
//--------------------------------------------------------------------------------------------------
// Overide sys_raw_spu_mmio_read / write, since they draw out another bug in SNC :(
//--------------------------------------------------------------------------------------------------
#define sys_raw_spu_mmio_read(spu, regoffset) spumgr_mmio_read(spu, regoffset)
extern uint32_t spumgr_mmio_read(uint32_t spu, uint32_t regoffset);
#define sys_raw_spu_mmio_write(spu, regoffset, value) spumgr_mmio_write(spu, regoffset, value)
extern void spumgr_mmio_write(int id, int offset, uint32_t value);
//--------------------------------------------------------------------------------------------------
// Types
//--------------------------------------------------------------------------------------------------
typedef int CreateSPUTaskCallback(SpuTaskHandle *pTask);
// SpuStatusRegister
// Described in CBE architecture v10 on page 87
typedef union SpuStatusRegister
{
struct
{
uint32_t m_sc : 16;
uint32_t m_reserved2 : 5;
uint32_t m_isolateExitStatus : 1;
uint32_t m_isolateLoadStatus : 1;
uint32_t m_reserved1 : 1;
uint32_t m_isolationStatus : 1;
uint32_t m_illegalChannelInstructionDetected : 1;
uint32_t m_invalidInstructionDetected : 1;
uint32_t m_singleStepStatus : 1;
uint32_t m_waitStatus : 1;
uint32_t m_haltStatus : 1;
uint32_t m_programStopAndSignalStatus : 1;
uint32_t m_runStatus : 1;
};
uint32_t m_val;
} SpuStatusRegister;
//--------------------------------------------------------------------------------------------------
// Classes
//--------------------------------------------------------------------------------------------------
class SpuTaskHandle
{
public:
sys_raw_spu_t m_spuId;
sys_ppu_thread_t m_ppuThread;
sys_interrupt_tag_t m_intrTag;
sys_interrupt_thread_handle_t m_interruptThread;
uint32_t m_lock;
uint32_t m_memcpyLock;
};
//--------------------------------------------------------------------------------------------------
// SpuMgr
//
// Provides functionality for running raw spu tasks. For this purpose it creates
// and manages a raw spu pool
//
// Currently we assume a simple setup where app loads an elf on to a raw spu,
// after which the spu starts running the elf and continues to do so thereafter.
// The ppu->spu and spu->ppu communication is explicitly handled by the app
// and the spu program using SpuMgr methods
//
// Currently all DMA transfer is supposed to be initiated by the SPUs which is
// why SpuMgr does not provide any DMA functionality
//--------------------------------------------------------------------------------------------------
class SpuMgr
{
public:
// Init/Term
int Init(int numRawSpu);
void Term();
// Create/Destroy tasks
int CreateSpuTask(const char *path, SpuTaskHandle *pTask, CreateSPUTaskCallback *pfnCallback = NULL);
void DestroySpuTask(SpuTaskHandle *pTask);
//
// Helper functions to communicate with the SPU
// As we build more functionality into the SPU mgr it is
// possible that we will need to expose less of
// these low-level functions
//
//
// Mailbox functions
//
//
// The SPU Inbound Mailbox is a 4-level FIFO structure for communication from the
// PPU to SPU, and can hold up to four 32-bit messages.
// If there are already four messages in the mailbox the last message will be
// overwritten...but we can check for a full mailbox and prevent this
int WriteMailbox(SpuTaskHandle *pTask, uint32_t val, bool bBlocking = true);
// The SPU Outbound Mailbox can hold one 32-bit message for SPU-to-PPU communication.
int ReadMailbox(SpuTaskHandle *pTask, uint32_t *pVal, bool bBlocking = true);
// The SPU Outbound Interrupt Mailbox can hold one 32-bit message for SPU-to-PPU communication.
int ReadIntrMailbox(SpuTaskHandle *pTask, uint32_t *pVal, bool bBlocking = true);
//
// Access to local store - note that this involves MMIO which will be slow
// so need to use DMA instead for any significant data transfer. This
// mechanism may be useful for writing some small amount of data such
// as some constants etc into LS
//
int WriteLS(SpuTaskHandle *pTask, uint32_t lsOffset, void *pData, uint32_t size);
int ReadLS(SpuTaskHandle *pTask, uint32_t lsOffset, void *pData, uint32_t size);
bool Lock( SpuTaskHandle *pTask );
void Unlock( SpuTaskHandle *pTask );
bool MemcpyLock( SpuTaskHandle *pTask );
void MemcpyUnlock( SpuTaskHandle *pTask );
// CellSpurs2 m_exclusiveSpusSpurs; // SPURS instance running on SPUs used exclusively by the application
// CellSpurs2 m_preemptedSpuSpurs; // SPURS instance running on an SPU shared with the OS (may be preempted by it occasionally)
private:
uint32_t m_numSpus;
uint32_t m_spuInUse[MAX_RAW_SPUS];
sys_raw_spu_t m_spuIds[MAX_RAW_SPUS];
int ReadMailboxChannel(SpuTaskHandle *pTask, uint32_t *pVal,
uint32_t countMask, uint32_t channel, bool bBlocking = true);
};
//--------------------------------------------------------------------------------------------------
// Externs
//--------------------------------------------------------------------------------------------------
extern SpuMgr gSpuMgr;
//--------------------------------------------------------------------------------------------------
// DmaCheckAlignment
// Checks restrictions specified in SpuMgr::DmaGet
//--------------------------------------------------------------------------------------------------
int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size);
//--------------------------------------------------------------------------------------------------
// GetStopSignal
//--------------------------------------------------------------------------------------------------
inline uint32_t GetStopSignal( sys_raw_spu_t idSpu )
{
SpuStatusRegister status;
status.m_val = sys_raw_spu_mmio_read(idSpu, SPU_Status);
uint32_t stopSignal = status.m_sc;
return stopSignal;
}
#endif // INCLUDED_SPUMGR_PPU_H

View File

@@ -0,0 +1,485 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include "SpuMgr_spu.h"
#include <cell/atomic.h>
#ifndef _CERT
#include <libsn_spu.h>
#endif
#include <stdlib.h>
#include <string.h>
//--------------------------------------------------------------------------------------------------
// Globals
//--------------------------------------------------------------------------------------------------
// singleton instance
SpuMgr gSpuMgr __attribute__((aligned(128)));
unsigned char gUnalignedMem[16] __attribute__((aligned(16)));
MemCpyHeader gMemCpyHeader __attribute__((aligned(16)));
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SPU_memcpy( void *pBuf1, void *pBuf2 )
{
uint32_t header;
gSpuMgr.ReadMailbox( &header );
gSpuMgr.MemcpyLock();
gSpuMgr.DmaGetUNSAFE( &gMemCpyHeader, header, sizeof( MemCpyHeader ), 0 );
gSpuMgr.DmaDone( 0x1 );
DEBUG_ERROR( ( gMemCpyHeader.src & 0xf ) == 0 );
uint32_t sizeAligned;
uint32_t sizeAlignedDown;
uint32_t dstAlignedDown;
uint32_t offset;
memcpy( gUnalignedMem, gMemCpyHeader.cacheLine, 16 );
while ( gMemCpyHeader.size > 8192 )
{
sizeAligned = 8192;
dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 );
offset = gMemCpyHeader.dst - dstAlignedDown;
gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 );
gSpuMgr.DmaDone( 0x1 );
if ( offset )
{
memcpy( pBuf2, gUnalignedMem, offset );
}
memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, sizeAligned );
gSpuMgr.DmaSync();
gSpuMgr.DmaPut( dstAlignedDown, pBuf2, SPUMGR_ALIGN_UP( sizeAligned + offset, 16 ), 0 );
gSpuMgr.DmaDone( 0x1 );
sizeAlignedDown = SPUMGR_ALIGN_DOWN( sizeAligned + offset, 16 );
memcpy( gUnalignedMem, (void *) ( (uint32_t) pBuf2 + sizeAlignedDown ), 16 );
gMemCpyHeader.size -= sizeAligned;
gMemCpyHeader.dst += 8192;
gMemCpyHeader.src += 8192;
}
sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size, 16 );
dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 );
offset = gMemCpyHeader.dst - dstAlignedDown;
gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 );
gSpuMgr.DmaDone( 0x1 );
if ( offset )
{
memcpy( pBuf2, gUnalignedMem, offset );
}
memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, gMemCpyHeader.size );
sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size + offset, 16 );
gSpuMgr.DmaSync();
gSpuMgr.DmaPut( dstAlignedDown, pBuf2, sizeAligned, 0 );
gSpuMgr.DmaDone( 0x1 );
if ( gMemCpyHeader.blocking )
{
gSpuMgr.WriteMailbox( 0 );
}
gSpuMgr.MemcpyUnlock();
}
//--------------------------------------------------------------------------------------------------
// DmaCheckAlignment
//
// Checks restrictions specified in SpuMgr::DmaGet
//--------------------------------------------------------------------------------------------------
int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size)
{
#if !defined( _CERT )
uint32_t align = size;
bool error = false;
if (size >= 16 && ((size & 0xf) == 0))
{
align = 16;
}
else if (size == 8 || size == 4 || size == 2 || size == 1)
{
error = ((src & 0xF) != (dest & 0xF));
}
else
{
error = true; // bad size
}
return (!error && src && dest &&
SPUMGR_IS_ALIGNED(src, align) &&
SPUMGR_IS_ALIGNED(dest, align));
#else //!FINAL
return 1;
#endif //!FINAL
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
int SpuMgr::Init()
{
// Start the decrementer since it is possible
// that it has not been started by default
const unsigned int kEventDec = 0x20;
// Disable the decrementer event.
unsigned int maskEvents = spu_readch(SPU_RdEventStatMask);
spu_writech(SPU_WrEventMask, maskEvents & ~kEventDec);
// Acknowledge any pending events and stop the decrementer.
spu_writech(SPU_WrEventAck, kEventDec);
// Write the decrementer value to start the decrementer.
unsigned int decValue = spu_readch(SPU_RdDec);
spu_writech(SPU_WrDec, decValue);
// Enable events.
spu_writech(SPU_WrEventMask, maskEvents | kEventDec);
// Reset byte count
ResetBytesTransferred();
// reset malloc count
m_mallocCount = 0;
// Read the effective address of the SPU locks.
ReadMailbox( &m_lockEA );
ReadMailbox( &m_memcpyLockEA );
return 0;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SpuMgr::Term()
{
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGet
//
// DmaGet - alignment and size checking
// DmaGetUNSAFE - no alignment or size checking (but will assert in debug)
// _DmaGet - handles badly aligned dma's, should be a private member really (doesn't handle small dma's)
//
// DMA restrictions
// An MFC supports naturally aligned DMA transfer sizes of 1, 2, 4,
// 8, and 16 bytes and multiples of 16 bytes
// Furthermore, if size is 1, 2, 4, or 8 bytes then lower 4 bits
// of LS and EA must match
//
// Note:
// Peak performance is achieved for transfers in which both the EA and
// the LSA are 128-byte aligned and the size of the transfer is a multiple
// of 128 bytes.
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetUNSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
{
DEBUG_ERROR( ea < 0xd0000000 );
DEBUG_ERROR( ea );
DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
// do the dma
while (size)
{
uint32_t dmaSize = 0x4000;
dmaSize = (size < dmaSize)? size: dmaSize;
size -= dmaSize;
// kick off dma
spu_mfcdma64( (void*)ls, 0, ea, dmaSize, tagId, MFC_GET_CMD);
m_numDMATransfers++;
ls = (void*)((uint32_t)ls + dmaSize);
ea += dmaSize;
}
// add up bytes transferred
m_bytesRequested += size;
m_bytesTransferred += size;
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::_DmaGet
//
// Internal function - do not call this directly
//--------------------------------------------------------------------------------------------------
void SpuMgr::_DmaGet(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
{
uint32_t unaligned = false;
uint32_t eaAligned = (uint32_t)ea;
uint32_t sizeAligned = size;
uint32_t lsAligned = (uint32_t)ls;
uint32_t sizeOffset = 0;
char *pTempBuff = NULL;
// check if src is unaligned
if (eaAligned & 0xF)
{
eaAligned = eaAligned & ~0xF; // round down
sizeOffset = ea - eaAligned;
sizeAligned += sizeOffset;
unaligned = true;
}
// check if size is unaligned
if (sizeAligned & 0xF)
{
sizeAligned = (sizeAligned + 0xF) & ~0xF; // round up
unaligned = true;
}
// if we have adjusted the size, or if ls is unaligned,
// we need to alloc temp buffer
if (unaligned || (lsAligned & 0xF))
{
pTempBuff = (char*)MemAlign(0x10, sizeAligned);
lsAligned = (uint32_t)pTempBuff;
unaligned = true;
}
// add up bytes transferred, for informational purposes
m_bytesRequested += size;
m_bytesTransferred += sizeAligned;
// do the dma
while (sizeAligned)
{
uint32_t dmaSize = 0x4000;
dmaSize = (sizeAligned < dmaSize)? sizeAligned: dmaSize;
sizeAligned -= dmaSize;
// kick off dma
spu_mfcdma64( (void*)lsAligned, 0, eaAligned, dmaSize, tagId, MFC_GET_CMD);
m_numDMATransfers++;
lsAligned += dmaSize;
eaAligned += dmaSize;
}
if (unaligned)
{
// block for now till dma done because we do the memcpy right here
DmaDone(1 << tagId);
// copy data over
memcpy(ls, pTempBuff + sizeOffset, size);
// free temp buff
Free(pTempBuff);
}
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGetSAFE
//
// DMA restrictions (look at SpuMgr::DmaGetUNSAFE in this file) are
// handled transparently by this function
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)
{
DEBUG_ERROR( ea );
if( size < 0x10 )
{
// lowest 4 bits of address have to match regardless, &
// size can only be 1, 2, 4 or 8 B
if( size==0x1 || size==0x2 || size==0x4 || size==0x8 )
{
if( ((uint32_t)ls&0xF == ea&0xF) )
{
DmaGetUNSAFE(ls,ea,size,tagId);
}
else
{
// small get not aligned within a 16B block
_DmaGet(ls,ea,size,tagId);
}
}
else
{
// if < 16B can only get 1,2,4 or 8B
_DmaGet(ls,ea,size,tagId);
}
}
else
{
if( (!(size & 0xF)) && // has to be multiple of 16B, &
(((uint32_t)ls&0xF)==0) && // ea and ls have to be 16B aligned
((ea&0xF)==0) )
{
// alignment is okay just dma
DmaGetUNSAFE(ls,ea,size,tagId);
}
else
{
_DmaGet(ls,ea,size,tagId);
}
}
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaPut
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId)
{
DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea
DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls
DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
// do the dma
while (size)
{
uint32_t dmaSize = 0x4000;
dmaSize = (size < dmaSize)? size: dmaSize;
size -= dmaSize;
// initiate dma to ppu
spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);
ls = (void*)((uint32_t)ls + dmaSize);
ea += dmaSize;
}
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaSmallPut
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaSmallPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId)
{
DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea
DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls
DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
uint32_t dmaSize = 1;
if ((size % 8) == 0)
{
dmaSize = 8;
}
else if ((size % 4) == 0)
{
dmaSize = 4;
}
else if ((size % 2) == 0)
{
dmaSize = 2;
}
while (size)
{
size -= dmaSize;
// initiate dma to ppu
spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);
ls = (void*)((uint32_t)ls + dmaSize);
ea += dmaSize;
}
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGetlist
//
// Gather data scattered around main mem, MFC will run through the list, and place the elements (based on ea address and size)
// contiguously in ls.
//
// NOTE: if an individual list element size is <16B, the data will still be dma'd but the proceeding element will be placed
// on the next 16B boundary. So it is possible to get lots of small elements, but you will be left with gaps in ls.
//
// ls - ls address of where items will be placed (contiguously)
// lsList - ls address of actual list
// sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements // sizeof(DMAList))
// tagId - works the same way as regular DMA's
//
// Alignment and Size Restrictions:
// -ls and lsList must be 8B aligned
// -size must be a multiple of 8B (sizeof(DMAList))
// -no more than 2048 list elements
//
// light error checking right now
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetList(void *ls, DMAList *pLS_List, uint32_t sizeList, uint32_t tagId)
{
DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned
DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions
DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B
DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements
// initiate dma list
spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_GETL_CMD );
}
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGPutlist
//
// Scatter data held contiguously in ls, to main mem
//
// ls - ls address of where items exist (contiguously) to be scattered back to main mem
// lsList - ls address of actual list
// sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements * sizeof(DMAList))
// tagId - works the same way as regular DMA's
//
// Alignment and Size Restrictions:
// ls and lsList must be 8B aligned, size must be a multiple of 8B (sizeof(DMAList))
//
// light error checking right now
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaPutList(void *ls, DMAList* pLS_List, uint32_t sizeList, uint32_t tagId)
{
DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned
DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions
DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B
DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements
// initiate dma list
spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_PUTL_CMD );
}

View File

@@ -0,0 +1,473 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
#ifndef INCLUDED_SPUMGR_SPU_H
#define INCLUDED_SPUMGR_SPU_H
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include <stdint.h>
#include <string.h>
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <stdlib.h>
#include <cell/atomic.h>
#include "SpuMgr_dma.h"
#include <libsn_spu.h>
//--------------------------------------------------------------------------------------------------
// Defines
//--------------------------------------------------------------------------------------------------
#define DEBUG_ASSERT(val) Assert(val)
#define DEBUG_ERROR(val) Assert(val)
#define Msg(...)
#define Error(...)
#define DebuggerBreak() snPause()
#include <sys/integertypes.h>
//Short aliases
typedef int8_t s8;
typedef uint8_t u8;
typedef int16_t s16;
typedef uint16_t u16;
typedef int32_t s32;
typedef uint32_t u32;
typedef uint32_t u64[2];
typedef float f32;
typedef double f64;
typedef int BOOL;
typedef s8 int8;
typedef u8 uint8;
typedef s16 int16;
typedef u16 uint16;
typedef s32 int32;
typedef u32 uint32;
typedef u64 uint64;
typedef unsigned int uintp;
typedef unsigned int uint;
typedef vector float fltx4 ;
#define INT_MAX 0x7fffffff
#define DECL_ALIGN(x) __attribute__( ( aligned( x ) ) )
#define ALIGN16 DECL_ALIGN(16)
#define ALIGN16_POST
#define ALIGN128 DECL_ALIGN(128)
#define ALIGN128_POST
template <typename T>
inline T AlignValue( T val, uintp alignment )
{
return ( T )( ( ( uintp )val + alignment - 1 ) & ~( alignment - 1 ) );
}
#define ALIGN_VALUE( val, alignment ) ( ( val + alignment - 1 ) & ~( alignment - 1 ) )
inline bool IsPowerOfTwo( uint x )
{
return ( x & ( x - 1 ) ) == 0;
}
#define FORCEINLINE inline /* __attribute__ ((always_inline)) */
#define IsPlatformPS3() 1
#define IsPlatformPS3_PPU() 0
#define IsPlatformPS3_SPU() 1
#define IsPlatformX360() 0
#define IsPlatformOSX() 0
#define RESTRICT
#define V_memset __builtin_memset
#define V_memcpy memcpy
void SPU_memcpy( void *pBuf1, void *pBuf2 );
#define MemAlloc_AllocAligned(size, align) gSpuMgr.MemAlign(align, size)
#define ARRAYSIZE(p) (sizeof(p)/sizeof(p[0]))
#define MIN( a, b ) ( ( ( a ) < ( b ) ) ? ( a ) : ( b ) )
#define MAX( a, b ) ( ( ( a ) > ( b ) ) ? ( a ) : ( b ) )
//--------------------------------------------------------------------------------------------------
// Task handle
//--------------------------------------------------------------------------------------------------
class SpuTaskHandle
{
public:
uint32_t m_spuId;
uint64_t m_ppuThread;
uint32_t m_intrTag;
uint32_t m_interruptThread;
uint32_t m_lock;
uint32_t m_memcpyLock;
};
//--------------------------------------------------------------------------------------------------
// SpuMgr
//--------------------------------------------------------------------------------------------------
class SpuMgr
{
public:
// Init/Term
int Init();
void Term();
// MFC Atomic Update functionality
// Currently provides functionality to read/write up to
// one cache line (128 bytes) of main mem
inline void MFCAGet(void *ls, uint32_t ea, uint32_t size);
inline void MFCAPut(void *ls, uint32_t ea, uint32_t size);
//
// DMA functionality
//
// tagId is a value between 0 and 31 that can be used to group
// dma requests together
void DmaGetSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId);
void DmaGetUNSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId);
void DmaPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId);
void DmaSmallPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId);
void DmaGetList(void *ls, DMAList *pLS_List, uint32_t sizeList, uint32_t tagId);
void DmaPutList(void *ls, DMAList* pLS_List, uint32_t sizeList, uint32_t tagId);
inline int DmaDone(uint32_t dmaTagMask, bool bBlocking = true);
// DmaSync
// All earlier store instructions are forced to complete
// before proceeding. This function ensures that all stores to
// to local storage are visible to the MFC or PPU.
inline void DmaSync()
{
__asm("dsync");
}
//
// Mailbox functions - see SpuMgr_ppu.h for a descrition of mailboxes
//
int WriteMailbox(uint32_t val, bool bBlocking = true);
int WriteIntrMailbox(uint32_t val, bool bBlocking = true);
int WriteMailboxChannel(uint32_t val, uint32_t channel, bool bBlocking /* = true */);
int ReadMailbox(uint32_t *pVal, bool bBlocking = true);
bool Lock();
void Unlock();
bool MemcpyLock();
void MemcpyUnlock();
// Decrementer access, for time stamps
inline uint32_t ReadDecr(void);
// mem mgr
void *Malloc( uint32_t size )
{
m_mallocCount++;
void *ptr = malloc( size );
DEBUG_ASSERT( ptr );
return ptr;
}
void *MemAlignUNSAFE(uint32_t boundary, uint32_t size )
{
m_mallocCount++;
void *ptr = memalign( boundary, size );
return ptr;
}
void *MemAlign( uint32_t boundary, uint32_t size )
{
void *ptr = MemAlignUNSAFE(boundary, size);
DEBUG_ERROR( ptr );
return ptr;
}
void Free( void *pData )
{
m_mallocCount--;
free( pData );
}
uint32_t GetMallocCount()
{
return m_mallocCount;
}
// counters to help us keep track of how much data we are moving
inline void ResetBytesTransferred()
{
m_bytesRequested = 0;
m_bytesTransferred = 0;
m_numDMATransfers = 0;
}
// Private data and member functions
void _DmaGet(void *ls, uint32_t ea, uint32_t size, uint32_t tagId);
uint32_t m_lock[32] __attribute__ ((aligned(128)));
uint32_t m_lockEA;
uint32_t m_memcpyLock[32] __attribute__ ((aligned(128)));
uint32_t m_memcpyLockTest;// __attribute__ ((aligned(128)));
uint32_t m_memcpyLockEA;
uint32_t m_bytesRequested;
uint32_t m_bytesTransferred;
uint32_t m_numDMATransfers;
uint32_t m_mallocCount;
uint8_t m_MFCACacheLine[128] __attribute__ ((aligned(128)));
};
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline void SpuMgr::MFCAGet(void *ls, uint32_t ea, uint32_t size)
{
// get start of cache line
uint32_t eaAligned = SPUMGR_ALIGN_DOWN(ea, 0x80);
// get offset to given ea
uint32_t eaOffset = ea - eaAligned;
// check size to read
DEBUG_ASSERT(size + eaOffset <= 0x80);
// read cache line
spu_mfcdma64(&m_MFCACacheLine[0], 0, eaAligned, 128, 0, MFC_GETLLAR_CMD);
// wait for completion - this is a blocking read
spu_readch(MFC_RdAtomicStat);
// copy out data
memcpy(ls, &m_MFCACacheLine[eaOffset], size);
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline void SpuMgr::MFCAPut(void *ls, uint32_t ea, uint32_t size)
{
// get start of cache line
uint32_t eaAligned = SPUMGR_ALIGN_DOWN(ea, 0x80);
// get offset to given ea
uint32_t eaOffset = ea - eaAligned;
// check size to write
DEBUG_ASSERT(size + eaOffset <= 0x80);
// atmoic update - read cache line and reserve it, update it,
// conditionally write it back until write succeeds
// if write succeeds then spu_readch(MFC_RdAtomicStat) returns 0
do
{
// read cache line
spu_mfcdma64(&m_MFCACacheLine[0], 0, eaAligned, 128, 0, MFC_GETLLAR_CMD);
// wait for completion - this is a blocking read
spu_readch(MFC_RdAtomicStat);
spu_dsync();
// update cache line
memcpy(&m_MFCACacheLine[eaOffset], ls, size);
// dsync to make sure it's commited to LS
spu_dsync();
// write it back
spu_mfcdma64(&m_MFCACacheLine[0], 0, eaAligned, 128, 0, MFC_PUTLLC_CMD);
} while (__builtin_expect(spu_readch(MFC_RdAtomicStat), 0));
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline int SpuMgr::DmaDone(uint32_t dmaTagMask, bool bBlocking /*=true*/)
{
// From Cell Broadband Engine Architecture V1.0 Chapter 9.3.1 "Procedures for Determining the Status of Tag Groups"
//
// For polling for the completion of an MFC command or for the completion of a group of MFC commands, the
// basic procedure is as follows:
// 1. Clear any pending tag status update requests by:
// <09> Writing a <20>0<EFBFBD> to the MFC Write Tag Status Update Request Channel (see page 116)
// <09> Reading the channel count associated with the MFC Write Tag Status Update Request Channel (see
// page 116), until a value of <20>1<EFBFBD> is returned
// <09> Reading the MFC Read Tag-Group Status Channel (see page 117) and discarding the tag status
// data.
// 2. Enable the tag groups of interest by writing the MFC Write Tag-Group Query Mask Channel (see page
// 114) with the appropriate mask data (only needed if a new tag-group mask is required).
// 3. Request an immediate tag status update by writing the MFC Write Tag Status Update Request Channel
// (see page 116) with a value of <20>0<EFBFBD>.
// 4. Perform a read of the MFC Read Tag-Group Status Channel (see page 117). The data returned is the
// current status of each tag group with the tag-group mask applied.
// 5. Repeat steps 3 and 4 until the tag group or the tag groups of interest are complete.
//
// Note
// MFC Write Tag Status Update Request Channel = MFC_WrTagUpdate
// MFC Read Tag-Group Status Channel = MFC_RdTagStat
// MFC Write Tag-Group Query Mask Channel = MFC_WrTagMask
// Here we go...
// 1. Clear any pending tag status update requests
spu_writech(MFC_WrTagUpdate, 0);
do {} while (spu_readchcnt(MFC_WrTagUpdate) == 0);
spu_readch(MFC_RdTagStat);
// 2. Enable the tag groups of interest
spu_writech(MFC_WrTagMask, dmaTagMask);
uint32_t dmaDone = 0;
do
{
// 3. Request an immediate tag status update
spu_writech(MFC_WrTagUpdate, 0);
// 4. Perform a read of the MFC Read Tag-Group Status Channel
uint32_t tagGroupStat = spu_readch(MFC_RdTagStat);
dmaDone = (tagGroupStat == dmaTagMask);
} while (bBlocking && !dmaDone);
return !dmaDone;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline int SpuMgr::WriteMailbox(uint32_t val, bool bBlocking /* = true */)
{
uint32_t mboxAvailable;
do
{
mboxAvailable = spu_readchcnt(SPU_WrOutMbox);
} while (bBlocking && !mboxAvailable);
if (mboxAvailable)
{
spu_writech(SPU_WrOutMbox, val);
}
return !mboxAvailable;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline int SpuMgr::WriteIntrMailbox(uint32_t val, bool bBlocking /* = true */)
{
uint32_t mboxAvailable;
do
{
mboxAvailable = spu_readchcnt(SPU_WrOutIntrMbox);
} while (bBlocking && !mboxAvailable);
if (mboxAvailable)
{
spu_writech(SPU_WrOutIntrMbox, val);
}
return !mboxAvailable;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline int SpuMgr::ReadMailbox(uint32_t *pVal, bool bBlocking /* = true */)
{
uint32_t mailAvailable;
do
{
mailAvailable = spu_readchcnt(SPU_RdInMbox);
} while (bBlocking && !mailAvailable);
if (mailAvailable)
*pVal = spu_readch(SPU_RdInMbox);
return !mailAvailable;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline uint32_t SpuMgr::ReadDecr(void)
{
return spu_readch(SPU_RdDec);
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline bool SpuMgr::Lock()
{
return cellAtomicCompareAndSwap32( m_lock, m_lockEA, 0, 1 ) == 0;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline void SpuMgr::Unlock()
{
cellAtomicCompareAndSwap32( m_lock, m_lockEA, 1, 0 );
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline bool SpuMgr::MemcpyLock()
{
return cellAtomicCompareAndSwap32( m_memcpyLock, m_memcpyLockEA, 0, 1 ) == 0;
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
inline void SpuMgr::MemcpyUnlock()
{
cellAtomicCompareAndSwap32( m_memcpyLock, m_memcpyLockEA, 1, 0 );
}
//--------------------------------------------------------------------------------------------------
// Externs
//--------------------------------------------------------------------------------------------------
extern SpuMgr gSpuMgr;
#endif // INCLUDED_SPUMGR_SPU_H

View File

@@ -0,0 +1,32 @@
//================ Copyright (c) 1996-2009 Valve Corporation. All Rights Reserved. =================
#include "cgutils.h"
#include "tier0/dbg.h"
struct DatatypeRec_t
{
CGtype type;
CGparameterclass parameterClass;
};
static DatatypeRec_t s_datatypeClassname[] = {
#define CG_DATATYPE_MACRO(name, compiler_name, enum_name, base_enum, nrows, ncols,classname) \
{ enum_name, classname },
#include <Cg/cg_datatypes.h>
#undef CG_DATATYPE_MACRO
};
CGparameterclass vcgGetTypeClass( CGtype type )
{
if( type <= CG_TYPE_START_ENUM || type > CG_TYPE_START_ENUM + sizeof( s_datatypeClassname ) / sizeof( s_datatypeClassname[0] ) )
{
return CG_PARAMETERCLASS_UNKNOWN;
}
else
{
DatatypeRec_t & rec = s_datatypeClassname[type - CG_TYPE_START_ENUM - 1];
Assert( rec.type == type );
return rec.parameterClass;
}
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,565 @@
//================ Copyright (c) 1996-2009 Valve Corporation. All Rights Reserved. =================
//
// LibGcm implementation of DX
//
//==================================================================================================
#ifndef DXABSTRACT_H
#define DXABSTRACT_H
#include "tier0/platform.h"
#include "tier0/memalloc.h"
#include "utlvector.h"
#include <cell/gcm.h>
#include <cell/gcm/gcm_method_data.h>
#include <cell/gcm/gcm_methods.h>
#include <sysutil/sysutil_sysparam.h>
#include "gcmconfig.h"
#include "dxabstract_def.h"
#include "gcmtexture.h"
#include "gcmlabels.h"
#define GCM_ALLOW_TIMESTAMPS 1
#ifdef _CERT
#define Debugger() ((void)0)
#else
#define Debugger() DebuggerBreak()
#endif
#define PS3GCM_ARTIFICIAL_TEXTURE_HANDLE_INDEX_BACKBUFFER 0
#define PS3GCM_ARTIFICIAL_TEXTURE_HANDLE_INDEX_DEPTHBUFFER 1
//--------------------------------------------------------------------------------------------------
// Interfaces
//--------------------------------------------------------------------------------------------------
struct IDirect3DResource9 : public IUnknown
{
IDirect3DDevice9 *m_device; // parent device
D3DRESOURCETYPE m_restype;
DWORD SetPriority(DWORD PriorityNew);
};
// for the moment, a "D3D surface" is modeled as a GLM tex, a face, and a mip.
struct IDirect3DSurface9 : public IDirect3DResource9
{
// no Create method, these are filled in by the various create surface methods.
HRESULT LockRect(D3DLOCKED_RECT* pLockedRect,CONST RECT* pRect,DWORD Flags);
HRESULT UnlockRect();
HRESULT GetDesc(D3DSURFACE_DESC *pDesc);
// only invoke this on depth/stencil surfaces please...
// axed HRESULT ResetDepthStencilSurfaceSize( int Width, int Height );
D3DSURFACE_DESC m_desc; // Layout must be the same as IDirect3DBaseTexture9!
CPs3gcmTexture *m_tex;
int m_face;
int m_mip;
bool m_bOwnsTexture;
~IDirect3DSurface9() { if ( m_bOwnsTexture && m_tex ) m_tex->Release(); }
};
struct IDirect3DBaseTexture9 : public IDirect3DResource9 // "A Texture.."
{
D3DSURFACE_DESC m_descZero; // desc of top level.
CPs3gcmTexture *m_tex; // this object owns data
D3DRESOURCETYPE GetType();
DWORD GetLevelCount();
HRESULT GetLevelDesc(UINT Level,D3DSURFACE_DESC *pDesc);
~IDirect3DBaseTexture9() { if ( m_tex ) m_tex->Release(); }
};
struct IDirect3DTexture9 : public IDirect3DBaseTexture9 // "Texture 2D"
{
//CUtlVector< IDirect3DSurface9* > m_surfs;
IDirect3DSurface9 *m_surfZero; // surf of top level. YUK!!
HRESULT LockRect(UINT Level,D3DLOCKED_RECT* pLockedRect,CONST RECT* pRect,DWORD Flags);
HRESULT UnlockRect(UINT Level);
HRESULT GetSurfaceLevel(UINT Level,IDirect3DSurface9** ppSurfaceLevel);
~IDirect3DTexture9() { if ( m_surfZero ) m_surfZero->Release(); }
};
struct IDirect3DCubeTexture9 : public IDirect3DBaseTexture9 // "Texture Cube Map"
{
IDirect3DSurface9 *m_surfZero[6]; // surfs of top level. YUK!!
HRESULT GetCubeMapSurface(D3DCUBEMAP_FACES FaceType,UINT Level,IDirect3DSurface9** ppCubeMapSurface);
HRESULT GetLevelDesc(UINT Level,D3DSURFACE_DESC *pDesc);
~IDirect3DCubeTexture9() { for ( int j = 0; j < 6; ++ j ) if ( m_surfZero[j] ) m_surfZero[j]->Release(); }
};
struct IDirect3DVolumeTexture9 : public IDirect3DBaseTexture9 // "Texture 3D"
{
IDirect3DSurface9 *m_surfZero; // surf of top level. YUK!!
D3DVOLUME_DESC m_volDescZero; // volume desc top level
HRESULT LockBox(UINT Level,D3DLOCKED_BOX* pLockedVolume,CONST D3DBOX* pBox,DWORD Flags);
HRESULT UnlockBox(UINT Level);
HRESULT GetLevelDesc( UINT level, D3DVOLUME_DESC *pDesc );
~IDirect3DVolumeTexture9() { if ( m_surfZero ) m_surfZero->Release(); }
};
struct IDirect3D9 : public IUnknown
{
public:
UINT GetAdapterCount(); //cheese: returns 1
HRESULT GetDeviceCaps (UINT Adapter,D3DDEVTYPE DeviceType,D3DCAPS9* pCaps);
HRESULT GetAdapterIdentifier (UINT Adapter,DWORD Flags,D3DADAPTER_IDENTIFIER9* pIdentifier);
HRESULT CheckDeviceFormat (UINT Adapter,D3DDEVTYPE DeviceType,D3DFORMAT AdapterFormat,DWORD Usage,D3DRESOURCETYPE RType,D3DFORMAT CheckFormat);
UINT GetAdapterModeCount (UINT Adapter,D3DFORMAT Format);
HRESULT EnumAdapterModes (UINT Adapter,D3DFORMAT Format,UINT Mode,D3DDISPLAYMODE* pMode);
HRESULT CheckDeviceType (UINT Adapter,D3DDEVTYPE DevType,D3DFORMAT AdapterFormat,D3DFORMAT BackBufferFormat,BOOL bWindowed);
HRESULT GetAdapterDisplayMode (UINT Adapter,D3DDISPLAYMODE* pMode);
HRESULT CheckDepthStencilMatch (UINT Adapter,D3DDEVTYPE DeviceType,D3DFORMAT AdapterFormat,D3DFORMAT RenderTargetFormat,D3DFORMAT DepthStencilFormat);
HRESULT CheckDeviceMultiSampleType (UINT Adapter,D3DDEVTYPE DeviceType,D3DFORMAT SurfaceFormat,BOOL Windowed,D3DMULTISAMPLE_TYPE MultiSampleType,DWORD* pQualityLevels);
HRESULT CreateDevice (UINT Adapter,D3DDEVTYPE DeviceType,VD3DHWND hFocusWindow,DWORD BehaviorFlags,D3DPRESENT_PARAMETERS* pPresentationParameters,IDirect3DDevice9** ppReturnedDeviceInterface);
};
struct IDirect3DSwapChain9 : public IUnknown
{
};
struct IDirect3DQuery9 : public IUnknown
{
public:
D3DQUERYTYPE m_type; // D3DQUERYTYPE_OCCLUSION or D3DQUERYTYPE_EVENT
uint32 m_queryIdx;
enum Flags_t
{
kQueryValueMask = 0x0000FFFF, // Mask for query value index
kQueryFinished = 0x80000000, // Query is completed
kQueryUninitialized = 0xFFFFFFFF, // Query hasn't started
};
struct QueryGlobalStateOcclusion_t
{
enum { kMaxQueries = GCM_REPORT_QUERY_LAST + 1 - GCM_REPORT_QUERY_FIRST, kGcmQueryBase = GCM_REPORT_QUERY_FIRST };
CellGcmReportData volatile *m_Values[kMaxQueries];
uint32 m_queryIdx;
uint32 PrepareForQuery();
};
static QueryGlobalStateOcclusion_t s_GlobalStateOcclusion;
struct QueryGlobalStateFence_t
{
enum { kMaxQueries = GCM_LABEL_QUERY_LAST + 1 - GCM_LABEL_QUERY_FIRST, kGcmLabelBase = GCM_LABEL_QUERY_FIRST };
uint32 volatile *m_Values[kMaxQueries];
uint32 m_queryIdx;
uint32 PrepareForQuery();
};
static QueryGlobalStateFence_t s_GlobalStateFence;
HRESULT Issue(DWORD dwIssueFlags);
HRESULT GetData(void* pData,DWORD dwSize,DWORD dwGetDataFlags);
};
struct IDirect3DGcmBufferBase : public IUnknown
{
public:
CPs3gcmBuffer *m_pBuffer;
HRESULT Lock(UINT OffsetToLock,UINT SizeToLock,void** ppbData,DWORD Flags);
HRESULT Unlock();
~IDirect3DGcmBufferBase() { if ( m_pBuffer ) m_pBuffer->Release(); }
};
struct IDirect3DVertexBuffer9 : public IDirect3DGcmBufferBase
{
public:
D3DVERTEXBUFFER_DESC m_vtxDesc; // to satisfy GetDesc
};
struct IDirect3DIndexBuffer9 : public IDirect3DGcmBufferBase
{
public:
D3DINDEXBUFFER_DESC m_idxDesc; // to satisfy GetDesc
HRESULT GetDesc(D3DINDEXBUFFER_DESC *pDesc);
};
struct IDirect3DGcmProgramBase : public IUnknown
{
public:
CgBinaryProgram *m_pProgram;
inline CGprogram GetCgProgram() const { return reinterpret_cast< CGprogram >( m_pProgram ); }
inline void * GetProgramUCode() const { return (((char*)m_pProgram) + m_pProgram->ucode); }
~IDirect3DGcmProgramBase() { if ( m_pProgram ) free( m_pProgram ); }
};
// define this to find out how many times we reuse the same shader during a frame
//#define DEBUG_GCM_VERTEX_SHADER_USAGE
struct IDirect3DVertexShader9 : public IDirect3DGcmProgramBase
{
public:
VertexShader9Data_t m_data;
//uint32 m_nIoOffsetStart; // the start of subroutine (IO Offset on RSX) that sets this vertex program
~IDirect3DVertexShader9();
};
struct IDirect3DPixelShader9 : public CAlignedNewDelete< 16, IUnknown >
{
public:
PixelShader9Data_t m_data;
public:
//inline CgBinaryFragmentProgram *GetFragmentProgram() const { return (CgBinaryFragmentProgram *)(((char*)m_pProgram) + m_pProgram->program); }
//void ValidateAssumptions( const char * pShaderName );
IDirect3DPixelShader9( CgBinaryProgram* prog );
~IDirect3DPixelShader9();
#ifdef _DEBUG
CgBinaryProgram *m_pCgProg;
#endif
};
struct ID3DXMatrixStack : public IUnknown
{
public:
CUtlVector<D3DMATRIX> m_stack;
int m_stackTop; // top of stack is at the highest index, this is that index. push increases, pop decreases.
HRESULT Create( void );
D3DXMATRIX* GetTop();
void Push();
void Pop();
void LoadIdentity();
void LoadMatrix( const D3DXMATRIX *pMat );
void MultMatrix( const D3DXMATRIX *pMat );
void MultMatrixLocal( const D3DXMATRIX *pMat );
HRESULT ScaleLocal(FLOAT x, FLOAT y, FLOAT z);
// Left multiply the current matrix with the computed rotation
// matrix, counterclockwise about the given axis with the given angle.
// (rotation is about the local origin of the object)
HRESULT RotateAxisLocal(CONST D3DXVECTOR3* pV, FLOAT Angle);
// Left multiply the current matrix with the computed translation
// matrix. (transformation is about the local origin of the object)
HRESULT TranslateLocal(FLOAT x, FLOAT y, FLOAT z);
};
typedef ID3DXMatrixStack* LPD3DXMATRIXSTACK;
struct IDirect3DDevice9Params
{
UINT m_adapter;
D3DDEVTYPE m_deviceType;
VD3DHWND m_focusWindow;
DWORD m_behaviorFlags;
D3DPRESENT_PARAMETERS m_presentationParameters;
};
struct D3DIndexDesc
{
IDirect3DIndexBuffer9 *m_idxBuffer;
};
struct IDirect3DDevice9 : public IUnknown
{
// members
IDirect3DDevice9Params m_params; // mirror of the creation inputs
// D3D flavor stuff
IDirect3DSurface9 *m_rtSurfaces[16]; // current color RT (Render Target) surfaces. [0] is initially == m_defaultColorSurface
IDirect3DSurface9 *m_dsSurface; // current Depth Stencil Render Target surface. can be changed!
IDirect3DSurface9 *m_defaultColorSurface; // default color surface.
IDirect3DSurface9 *m_defaultDepthStencilSurface; // queried by GetDepthStencilSurface.
IDirect3DVertexDeclaration9 *m_vertDecl; // Set by SetVertexDeclaration...
//D3DStreamDesc *m_pVertexStreamSources; // Set by SetStreamSource..
D3DIndexDesc m_indices; // Set by SetIndices..
IDirect3DVertexShader9 *m_vertexShader; // Set by SetVertexShader...
IDirect3DPixelShader9 *m_pixelShader; // Set by SetPixelShader...
#ifdef _DEBUG
uint m_nDrawIndexedPrimitives;
#endif
enum AntiAliasingStatusEnum_t
{
AA_STATUS_NORMAL,
AA_STATUS_PREV_FRAME, // drawing into previous frame, aliased
AA_STATUS_DEFERRED // drawing into deferred queue
};
// this is used to draw UI into already-mlaa'd-surface (to avoid AA'ing the UI)
// when this is on, the default surface to draw should be previous flip surface
AntiAliasingStatusEnum_t m_nAntiAliasingStatus;
// is in logical zpass? logical zpass may have wider scope than spuGcm.zPass, because logical zpass does not abort for any reason. It begins and ends with API calls. Used to balance Perf Marker Push/Pop
bool m_isZPass; //
bool m_isDeferredDrawQueueSurfaceSet;
// methods
// Create call invoked from IDirect3D9
HRESULT Create( IDirect3DDevice9Params *params );
//
// Basics
//
HRESULT Reset(D3DPRESENT_PARAMETERS* pPresentationParameters);
HRESULT SetViewport(CONST D3DVIEWPORT9* pViewport);
HRESULT BeginScene();
HRESULT Clear(DWORD Count,CONST D3DRECT* pRects,DWORD Flags,D3DCOLOR Color,float Z,DWORD Stencil);
HRESULT EndScene();
HRESULT Present(CONST RECT* pSourceRect,CONST RECT* pDestRect,VD3DHWND hDestWindowOverride,CONST RGNDATA* pDirtyRegion);
// textures
HRESULT CreateTexture(UINT Width,UINT Height,UINT Levels,DWORD Usage,D3DFORMAT Format,D3DPOOL Pool,IDirect3DTexture9** ppTexture,VD3DHANDLE* pSharedHandle);
HRESULT CreateCubeTexture(UINT EdgeLength,UINT Levels,DWORD Usage,D3DFORMAT Format,D3DPOOL Pool,IDirect3DCubeTexture9** ppCubeTexture,VD3DHANDLE* pSharedHandle);
HRESULT CreateVolumeTexture(UINT Width,UINT Height,UINT Depth,UINT Levels,DWORD Usage,D3DFORMAT Format,D3DPOOL Pool,IDirect3DVolumeTexture9** ppVolumeTexture,VD3DHANDLE* pSharedHandle);
HRESULT SetTexture(DWORD Stage,IDirect3DBaseTexture9* pTexture);
HRESULT GetTexture(DWORD Stage,IDirect3DBaseTexture9** ppTexture);
// render targets, color and depthstencil, surfaces, blit
HRESULT CreateRenderTarget(UINT Width,UINT Height,D3DFORMAT Format,D3DMULTISAMPLE_TYPE MultiSample,DWORD MultisampleQuality,BOOL Lockable,IDirect3DSurface9** ppSurface,VD3DHANDLE* pSharedHandle);
HRESULT SetRenderTarget(DWORD RenderTargetIndex,IDirect3DSurface9* pRenderTarget);
HRESULT GetRenderTarget(DWORD RenderTargetIndex,IDirect3DSurface9** ppRenderTarget);
HRESULT CreateOffscreenPlainSurface(UINT Width,UINT Height,D3DFORMAT Format,D3DPOOL Pool,IDirect3DSurface9** ppSurface,VD3DHANDLE* pSharedHandle);
HRESULT CreateDepthStencilSurface(UINT Width,UINT Height,D3DFORMAT Format,D3DMULTISAMPLE_TYPE MultiSample,DWORD MultisampleQuality,BOOL Discard,IDirect3DSurface9** ppSurface,VD3DHANDLE* pSharedHandle);
HRESULT SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil);
HRESULT GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface);
HRESULT GetRenderTargetData(IDirect3DSurface9* pRenderTarget,IDirect3DSurface9* pDestSurface); // ? is anyone using this ?
HRESULT GetFrontBufferData(UINT iSwapChain,IDirect3DSurface9* pDestSurface);
HRESULT StretchRect(IDirect3DSurface9* pSourceSurface,CONST RECT* pSourceRect,IDirect3DSurface9* pDestSurface,CONST RECT* pDestRect,D3DTEXTUREFILTERTYPE Filter);
// pixel shaders
HRESULT CreatePixelShader(CONST DWORD* pFunction,IDirect3DPixelShader9** ppShader, const char *pShaderName = NULL, char *debugLabel = NULL);
HRESULT SetPixelShader(IDirect3DPixelShader9* pShader);
HRESULT SetPixelShaderConstantF(UINT StartRegister,CONST float* pConstantData,UINT Vector4fCount);
HRESULT SetPixelShaderConstantB(UINT StartRegister,CONST BOOL* pConstantData,UINT BoolCount);
HRESULT SetPixelShaderConstantI(UINT StartRegister,CONST int* pConstantData,UINT Vector4iCount);
// vertex shaders
HRESULT CreateVertexShader(CONST DWORD* pFunction,IDirect3DVertexShader9** ppShader, char *debugLabel = NULL);
HRESULT SetVertexShader(IDirect3DVertexShader9* pShader);
HRESULT SetVertexShaderConstantF(UINT StartRegister,CONST float* pConstantData,UINT Vector4fCount);
HRESULT SetVertexShaderConstantB(UINT StartRegister,CONST BOOL* pConstantData,UINT BoolCount);
HRESULT SetVertexShaderConstantI(UINT StartRegister,CONST int* pConstantData,UINT Vector4iCount);
// vertex buffers
HRESULT CreateVertexDeclaration(CONST D3DVERTEXELEMENT9* pVertexElements,IDirect3DVertexDeclaration9** ppDecl);
HRESULT SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl);
HRESULT SetFVF(DWORD FVF); // we might not be using these ?
HRESULT GetFVF(DWORD* pFVF);
HRESULT CreateVertexBuffer(UINT Length,DWORD Usage,DWORD FVF,D3DPOOL Pool,IDirect3DVertexBuffer9** ppVertexBuffer,VD3DHANDLE* pSharedHandle);
HRESULT SetStreamSource(UINT StreamNumber,IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride);
HRESULT SetRawHardwareDataStreams( IDirect3DVertexBuffer9** ppRawHardwareDataStreams );
// index buffers
HRESULT CreateIndexBuffer(UINT Length,DWORD Usage,D3DFORMAT Format,D3DPOOL Pool,IDirect3DIndexBuffer9** ppIndexBuffer,VD3DHANDLE* pSharedHandle);
HRESULT SetIndices(IDirect3DIndexBuffer9* pIndexData);
// State management.
HRESULT SetRenderState(D3DRENDERSTATETYPE State,DWORD Value);
HRESULT SetSamplerState(DWORD Sampler,D3DSAMPLERSTATETYPE Type,DWORD Value);
// Draw.
HRESULT ValidateDrawPrimitiveStreams( D3DPRIMITIVETYPE Type, UINT baseVertexIndex, UINT MinVertexIndex, UINT NumVertices, UINT startIndex, UINT primCount ); // validate streams
HRESULT DrawPrimitive(D3DPRIMITIVETYPE PrimitiveType,UINT StartVertex,UINT PrimitiveCount);
void DrawPrimitiveUP(D3DPRIMITIVETYPE PrimitiveType,UINT PrimitiveCount,CONST void *pVertexStreamZeroData,UINT VertexStreamZeroStride);
HRESULT DrawIndexedPrimitive(D3DPRIMITIVETYPE PrimitiveType,INT BaseVertexIndex,UINT MinVertexIndex,UINT NumVertices,UINT startIndex,UINT primCount);
HRESULT DrawIndexedPrimitiveUP(D3DPRIMITIVETYPE PrimitiveType,UINT MinVertexIndex,UINT NumVertices,UINT PrimitiveCount,CONST void* pIndexData,D3DFORMAT IndexDataFormat,CONST void* pVertexStreamZeroData,UINT VertexStreamZeroStride);
// misc
BOOL ShowCursor(BOOL bShow);
HRESULT ValidateDevice(DWORD* pNumPasses);
HRESULT SetMaterial(CONST D3DMATERIAL9* pMaterial);
HRESULT LightEnable(DWORD Index,BOOL Enable);
HRESULT SetScissorRect(CONST RECT* pRect);
HRESULT CreateQuery(D3DQUERYTYPE Type,IDirect3DQuery9** ppQuery);
HRESULT GetDeviceCaps(D3DCAPS9* pCaps);
HRESULT TestCooperativeLevel();
HRESULT EvictManagedResources();
HRESULT SetLight(DWORD Index,CONST D3DLIGHT9*);
void SetGammaRamp(UINT iSwapChain,DWORD Flags,CONST D3DGAMMARAMP* pRamp);
// Talk to JasonM about this one. It's tricky in GL.
HRESULT SetClipPlane(DWORD Index,CONST float* pPlane);
ULONG __stdcall Release();
// Xbox ZPass analogue
void BeginZPass( DWORD Flags );
void SetPredication( DWORD PredicationMask );
HRESULT EndZPass();
// void ReloadZcullMemory( int nStencilRef );
void StartRenderingIntoPreviousFramebuffer();
void AntiAliasingHint( int nHint );
//
//
// **** FIXED FUNCTION STUFF - None of this stuff needs support in GL.
//
//
HRESULT SetTransform(D3DTRANSFORMSTATETYPE State,CONST D3DMATRIX* pMatrix);
HRESULT SetTextureStageState(DWORD Stage,D3DTEXTURESTAGESTATETYPE Type,DWORD Value);
#ifdef _PS3
void GetGPUMemoryStats( GPUMemoryStats &stats ) { return ::GetGPUMemoryStats( stats ); }
void FlushVertexCache();
void FlushTextureCache();
// Allocate storage for a texture's bits (if D3DUSAGE_TEXTURE_NOD3DMEMORY was used to defer allocation on creation)
bool AllocateTextureStorage( IDirect3DBaseTexture9 *pTexture );
protected:
// Flushing changes to GL
void SetVertexStreamSource( uint i, IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride );
void Ps3Helper_ResetSurfaceToKnownDefaultState();
void Ps3Helper_UpdateSurface( int idx );
friend void DxDeviceForceUpdateRenderTarget( );
#endif
};
struct ID3DXInclude
{
virtual HRESULT Open(D3DXINCLUDE_TYPE IncludeType, LPCSTR pFileName, LPCVOID pParentData, LPCVOID *ppData, UINT *pBytes) = 0;
virtual HRESULT Close(LPCVOID pData) = 0;
};
typedef ID3DXInclude* LPD3DXINCLUDE;
struct ID3DXBuffer : public IUnknown
{
void* GetBufferPointer();
DWORD GetBufferSize();
};
typedef ID3DXBuffer* LPD3DXBUFFER;
class ID3DXConstantTable : public IUnknown
{
};
typedef ID3DXConstantTable* LPD3DXCONSTANTTABLE;
// ------------------------------------------------------------------------------------------------------------------------------ //
// D3DX stuff.
// ------------------------------------------------------------------------------------------------------------------------------ //
const char* D3DXGetPixelShaderProfile( IDirect3DDevice9 *pDevice );
D3DXMATRIX* D3DXMatrixMultiply( D3DXMATRIX *pOut, CONST D3DXMATRIX *pM1, CONST D3DXMATRIX *pM2 );
D3DXVECTOR3* D3DXVec3TransformCoord( D3DXVECTOR3 *pOut, CONST D3DXVECTOR3 *pV, CONST D3DXMATRIX *pM );
HRESULT D3DXCreateMatrixStack( DWORD Flags, LPD3DXMATRIXSTACK* ppStack);
void D3DXMatrixIdentity( D3DXMATRIX * );
D3DXINLINE D3DXVECTOR3* D3DXVec3Subtract( D3DXVECTOR3 *pOut, CONST D3DXVECTOR3 *pV1, CONST D3DXVECTOR3 *pV2 )
{
pOut->x = pV1->x - pV2->x;
pOut->y = pV1->y - pV2->y;
pOut->z = pV1->z - pV2->z;
return pOut;
}
D3DXINLINE D3DXVECTOR3* D3DXVec3Cross( D3DXVECTOR3 *pOut, CONST D3DXVECTOR3 *pV1, CONST D3DXVECTOR3 *pV2 )
{
D3DXVECTOR3 v;
v.x = pV1->y * pV2->z - pV1->z * pV2->y;
v.y = pV1->z * pV2->x - pV1->x * pV2->z;
v.z = pV1->x * pV2->y - pV1->y * pV2->x;
*pOut = v;
return pOut;
}
D3DXINLINE FLOAT D3DXVec3Dot( CONST D3DXVECTOR3 *pV1, CONST D3DXVECTOR3 *pV2 )
{
return pV1->x * pV2->x + pV1->y * pV2->y + pV1->z * pV2->z;
}
D3DXMATRIX* D3DXMatrixInverse( D3DXMATRIX *pOut, FLOAT *pDeterminant, CONST D3DXMATRIX *pM );
D3DXMATRIX* D3DXMatrixTranspose( D3DXMATRIX *pOut, CONST D3DXMATRIX *pM );
D3DXPLANE* D3DXPlaneNormalize( D3DXPLANE *pOut, CONST D3DXPLANE *pP);
D3DXVECTOR4* D3DXVec4Transform( D3DXVECTOR4 *pOut, CONST D3DXVECTOR4 *pV, CONST D3DXMATRIX *pM );
D3DXVECTOR4* D3DXVec4Normalize( D3DXVECTOR4 *pOut, CONST D3DXVECTOR4 *pV );
D3DXMATRIX* D3DXMatrixTranslation( D3DXMATRIX *pOut, FLOAT x, FLOAT y, FLOAT z );
// Build an ortho projection matrix. (right-handed)
D3DXMATRIX* D3DXMatrixOrthoOffCenterRH( D3DXMATRIX *pOut, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn,FLOAT zf );
D3DXMATRIX* D3DXMatrixPerspectiveRH( D3DXMATRIX *pOut, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf );
D3DXMATRIX* D3DXMatrixPerspectiveOffCenterRH( D3DXMATRIX *pOut, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf );
// Transform a plane by a matrix. The vector (a,b,c) must be normal.
// M should be the inverse transpose of the transformation desired.
D3DXPLANE* D3DXPlaneTransform( D3DXPLANE *pOut, CONST D3DXPLANE *pP, CONST D3DXMATRIX *pM );
IDirect3D9 *Direct3DCreate9(UINT SDKVersion);
void D3DPERF_SetOptions( DWORD dwOptions );
HRESULT D3DXCompileShader(
LPCSTR pSrcData,
UINT SrcDataLen,
CONST D3DXMACRO* pDefines,
LPD3DXINCLUDE pInclude,
LPCSTR pFunctionName,
LPCSTR pProfile,
DWORD Flags,
LPD3DXBUFFER* ppShader,
LPD3DXBUFFER* ppErrorMsgs,
LPD3DXCONSTANTTABLE* ppConstantTable);
// fake D3D usage constant for SRGB tex creation
#define D3DUSAGE_TEXTURE_SRGB (0x80000000L)
// fake D3D usage constant for deferred tex bits allocation
#define D3DUSAGE_TEXTURE_NOD3DMEMORY (0x40000000L)
extern bool g_bDxMicroProfile;
#endif // DXABSTRACT_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,611 @@
//========= Copyright <20> Valve Corporation, All rights reserved. ====//
#include "tier0/platform.h"
#ifdef _PS3
#include "dxabstract.h"
#include <sys/memory.h>
#include "ps3/spugcm_shared.h"
#include "fpcpatcher_spu.h"
#include "cg/cg.h"
#include "cg/cgBinary.h"
#include "vjobs_interface.h"
#include "tier0/hardware_clock_fast.h"
#include "vjobs/fpcpatch_shared.h"
#include "vjobs/root.h"
#include "ps3/vjobutils.h"
#include "tier0/microprofiler.h"
#include "ps3/ps3_gcm_config.h"
#include "spugcm.h"
enum
{
PROFILE_SCE_VP_RSX = 7003,
PROFILE_SCE_FP_RSX = 7004
};
#define GCM_MUST_SUCCEED( FUNC, ... ) do { int nError = FUNC(__VA_ARGS__); if( nError != CELL_OK ) { Error( "Error 0x%X in " #FUNC ", %s:%d\n", nError, __FILE__, __LINE__ ); } } while( 0 )
DEFINE_LOGGING_CHANNEL_NO_TAGS( LOG_VJOBS, "VJOBS" );
CFragmentProgramConstantPatcher_SPU g_pixelShaderPatcher; // Patches pixel shader constants
CMicroProfiler g_mpBindProgram, g_mpFpcPatch2;
// debug only
CFragmentProgramConstantPatcher_SPU::CFragmentProgramConstantPatcher_SPU()
{
m_pBuffer = m_pBufferEnd = NULL;
m_nIoOffsetDelta = 0; // m_pBuffer + m_nIoOffsetDelta == IO offset usable by RSX
m_pPutFragmentProgram = NULL;
#ifdef DEBUG_FPC_PATCHER
m_bSync = ( CommandLine()->FindParm( "-fpcpsync" ) != 0 );
#endif
}
void CFragmentProgramConstantPatcher_SPU::InitLocal( void *pBuffer, uint nSize )
{
m_nFpcPatchCounter = 0;
m_nFpcPatchCounterOfLastSyncJob = 0;
//cellGcmSetDebugOutputLevel( CELL_GCM_DEBUG_LEVEL2 );
const uint nOverfetchGuard = 1024; // RSX front end prefetches up to 4k, but 1k is ( should be ) enough to avoid overfetch crashes
const uint nStateBufferQwords = 1 << 12; // make space for at least 8 full batches of constants...
uint nPatchStateBufferSize = ( sizeof( job_fpcpatch::FpcPatchState_t ) + sizeof( fltx4 ) * nStateBufferQwords );
uint32 nBufferIoOffset;
m_bFpcPatchOnPpu = ( 0 != CommandLine()->FindParm( "-fpcpatchonppu" ) );
#ifdef DEBUG_FPC_PATCHER
m_bTestAlwaysStateSync = ( 0 != CommandLine()->FindParm( "-fpcpstatesync" ) );
#endif
m_bEnableSPU = true;
m_nFpcPatchSyncMask = 0;
// use this passed buffer (probably from local memory) for the patched stuff
m_pBuffer = ( uint32* ) pBuffer;
m_pBufferEnd = ( uint32* ) ( uintp( pBuffer ) + nSize );
m_nBufferLocation = CELL_GCM_LOCATION_LOCAL;
m_isBufferPassedIn = true;
m_state.Init( ( job_fpcpatch::FpcPatchState_t* )MemAlloc_AllocAligned( nPatchStateBufferSize, 128 ), nStateBufferQwords );
GCM_MUST_SUCCEED( cellGcmAddressToOffset, m_pBuffer, &nBufferIoOffset );
#ifdef DBGFLAG_ASSERT
uint32 nBufferIoOffsetCheck;
GCM_MUST_SUCCEED( cellGcmAddressToOffset, m_pBuffer, &nBufferIoOffsetCheck );
Assert( nBufferIoOffsetCheck == nBufferIoOffset );
Assert( !( nBufferIoOffsetCheck & 0x7F ) );
for( uint nOffset = 0; nOffset < nSize; nOffset += 128 )
{
GCM_MUST_SUCCEED( cellGcmAddressToOffset, ((uint8*)m_pBuffer) + nOffset, &nBufferIoOffsetCheck );
Assert( nBufferIoOffsetCheck == nBufferIoOffset + nOffset );
}
#endif
m_nIoOffsetDelta = nBufferIoOffset - uintp( m_pBuffer );
#ifdef DEBUG_FPC_PATCHER
m_pSyncState = ( fltx4* ) MemAlloc_AllocAligned( sizeof( fltx4 ) * job_fpcpatch::MAX_VIRTUAL_CONST_COUNT, 16 );
V_memset( m_pSyncState, 0xCD, sizeof( fltx4 ) * job_fpcpatch::MAX_VIRTUAL_CONST_COUNT );
V_memset( m_state.m_pSharedState->m_reg, 0xCD, sizeof( fltx4 ) * job_fpcpatch::MAX_VIRTUAL_CONST_COUNT );
#endif
ResetPut();
//cellGcmSetDebugOutputLevel( CELL_GCM_DEBUG_LEVEL0 );
}
void CFragmentProgramConstantPatcher_SPU::Shutdown()
{
}
void CFragmentProgramConstantPatcher_SPU::ResetPut()
{
m_pPutFragmentProgram = m_pBufferEnd; // reserved word for the count of constants to set
}
CFragmentProgramConstantPatcher_SPU::~CFragmentProgramConstantPatcher_SPU()
{
if( m_isBufferPassedIn )
{
MemAlloc_FreeAligned( m_state.m_pSharedState );
}
else
{
sys_memory_free( ( sys_addr_t )m_pBuffer );
}
#ifdef DEBUG_FPC_PATCHER
MemAlloc_FreeAligned( m_pSyncState );
#endif
}
void CFragmentProgramConstantPatcher_SPU::BeginScene()
{
m_nFpcPatchCounterAtBeginScene = m_nFpcPatchCounter;
// we shouldn't have in-flight SPU jobs by now.. should we?
Assert( uint( g_spuGcmShared.m_nFpcpStartRangesAfterLastSync - m_state.m_pSharedState->m_nStartRanges ) <= m_state.m_pSharedState->m_nBufferMask + 1 );
}
void CFragmentProgramConstantPatcher_SPU::EndScene()
{
#if ENABLE_MICRO_PROFILER > 0
uint nPatchCounter = m_nFpcPatchCounter - m_nFpcPatchCounterAtBeginScene;
extern bool g_bDxMicroProfile;
if( g_bDxMicroProfile && nPatchCounter )
{
g_mpBindProgram.PrintAndReset( "[BindProgram] " );
g_mpFpcPatch2 .PrintAndReset( "[FpcPatch2] " );
}
#endif
}
job_fpcpatch2::FpHeader_t g_nullFpHeader = {0,0,0,0};
// semantics should match cgGLSetFragmentRegisterBlock()
void CFragmentProgramConstantPatcher_SPU::SetFragmentRegisterBlock( uint nStartRegister, uint nVector4fCount, const float * pConstantData )
{
#ifndef _CERT
if ( nStartRegister >= job_fpcpatch::MAX_VIRTUAL_CONST_COUNT || nStartRegister + nVector4fCount > job_fpcpatch::MAX_VIRTUAL_CONST_COUNT )
Error( "Invalid Fragment Register Block Range %u..%u\n", nStartRegister, nStartRegister + nVector4fCount );
#endif
#ifdef DEBUG_FPC_PATCHER
if( m_bSync )
{
fltx4 reg[job_fpcpatch::MAX_VIRTUAL_CONST_COUNT];
m_state.GetSyncState( reg );
Assert( !V_memcmp( m_pSyncState, reg, sizeof( fltx4 ) * job_fpcpatch::MAX_VIRTUAL_CONST_COUNT ) );
}
uint nEnd = m_state.m_nEndOfJournalIdx;
#endif
// we have 4 DMA elements ( 2..6 ) to fit the constant buffer; the 1st element may have to be as small as 16 bytes.
// this leaves the max constant buffer size 4 * 16kb + 16 bytes
const uint nMaxUploadRangeBeforeStateSync = ( 32 * 1024 ) / sizeof( fltx4 );
uint numUploadRangeQwords = m_state.m_nEndOfJournalIdx - g_spuGcmShared.m_nFpcpStartRangesAfterLastSync;
///////////////////////////////////////////////////////////////////////////
//
// PREPATCH MUST BE DONE IN (CTXFLUSH OR) DRAW JOB FROM NOW ON!!! g_spuGcmShared.m_nFpcpStartRangesAfterLastSync IS SYNCHRONOUS AND CORRECT THERE
//
//////////////////////////////////////////////////////////////////////////
/*
bool bPrePatch = nVector4fCount + 1 + numUploadRangeQwords > nMaxUploadRangeBeforeStateSync;
if( bPrePatch )
{
// force state sync now
if( g_spuGcmShared.m_enableStallWarnings )
{
Warning( "PPU-SPU Wait for RSX. SetFragmentRegisterBlock: Forced to set state on PPU, %u vectors, %u qwords in history. This is slow fallback path.\n", nVector4fCount, numUploadRangeQwords );
}
FpcPatch2( &g_nullFpHeader, sizeof( g_nullFpHeader ), NULL, NULL );
}
*/
if( uint nAttempts = m_state.AddRange( nStartRegister, nVector4fCount, pConstantData ) )
{
if( g_spuGcmShared.m_enableStallWarnings )
{
Warning( "PPU-SPU Wait for RSX. SetFragmentRegisterBlock: Stall, %d spins. Waiting for more memory; %d qwords, %d jobs buffered up\n", nAttempts, m_state.m_nEndOfJournalIdx - m_state.m_pSharedState->m_nStartRanges, g_spuGcmShared.m_nFpcPatchCounter - m_state.m_pSharedState->m_nThisStatePatchCounter );
}
}
#ifdef DEBUG_FPC_PATCHER
if( m_bTestAlwaysStateSync && !bPrePatch )
{
FpcPatch2( &g_nullFpHeader, sizeof( g_nullFpHeader ), NULL, NULL );
}
V_memcpy( m_pSyncState + nStartRegister, pConstantData, nVector4fCount * sizeof( fltx4 ) );
if( m_bSync )
{
fltx4 reg[job_fpcpatch::MAX_VIRTUAL_CONST_COUNT];
m_state.GetSyncState( reg );
Assert( !V_memcmp( m_pSyncState, reg, sizeof( fltx4 ) * job_fpcpatch::MAX_VIRTUAL_CONST_COUNT ) );
}
#endif
}
//volatile int g_nDebugStage = 0;
//
// Match the semantics of cgGLBindProgram()
// There are 2 formats of fragment shaders, see SDK docs "2. 2 Cg Compiler Options" and
// in Cg Compiler User's Guide:
// "7. 2 NV Binary Shader Format (VPO and FPO)"
// "7. 4 Cgb File Format Specification"
//
void CFragmentProgramConstantPatcher_SPU::BindProgram( const struct IDirect3DPixelShader9 * psh )
{
MICRO_PROFILE( g_mpBindProgram );
const job_fpcpatch2::FpHeader_t * prog = psh->m_data.m_eaFp;
uint32 nFragmentProgramOffset = uintp( m_pPutFragmentProgram ) + m_nIoOffsetDelta;
g_spuGcmShared.m_fpcpRing.UnlockRsxMemoryForSpu();
m_pPutFragmentProgram = ( uint32* )g_spuGcmShared.m_fpcpRing.LockRsxMemoryForSpu( &g_spuGcmShared.m_fpcpJobChain, prog->m_nUcodeSize );
nFragmentProgramOffset = uintp( m_pPutFragmentProgram ) - uintp( g_ps3gcmGlobalState.m_pLocalBaseAddress );
if( !IsCert() && nFragmentProgramOffset >= g_ps3gcmGlobalState.m_nLocalSize )
{
Error( "Fragment program Ucode buffer offset 0x%X is at unexpected address not in local memory\n", nFragmentProgramOffset );
}
if ( !IsCert() && ( m_pPutFragmentProgram < m_pBuffer || m_pPutFragmentProgram >= m_pBufferEnd ) )
{
Error( "Fragment Program UCode buffer overflow.\n" );
}
#ifdef DEBUG_FPC_PATCHER
if( m_bSync )
{
fltx4 reg[job_fpcpatch::MAX_VIRTUAL_CONST_COUNT];
m_state.GetSyncState( reg );
Assert( !V_memcmp( m_pSyncState, reg, sizeof( fltx4 ) * job_fpcpatch::MAX_VIRTUAL_CONST_COUNT ) );
}
#endif
uint nTexControls = prog->m_nTexControls;
// set jump to self
GCM_CTX_RESERVE( 7 + 2 * nTexControls );
uint32 * pJts = NULL;
FpcPatch2( prog, psh->m_data.m_nFpDmaSize, m_pPutFragmentProgram, pJts );
CELL_GCM_METHOD_SET_SHADER_CONTROL( GCM_CTX->current, prog->m_nShaderControl0 ); // +2
CELL_GCM_METHOD_SET_SHADER_PROGRAM( GCM_CTX->current, m_nBufferLocation + 1, ( nFragmentProgramOffset & 0x1fffffff ) ); // +2
CELL_GCM_METHOD_SET_VERTEX_ATTRIB_OUTPUT_MASK( GCM_CTX->current, psh->m_data.m_attributeInputMask | 0x20 ); // +2
V_memcpy( GCM_CTX->current, prog->GetTexControls(), nTexControls * sizeof( uint32 ) * 2 );
GCM_CTX->current += 2 * nTexControls;
#ifdef DEBUG_FPC_PATCHER
if( m_bSync )
{
g_ps3gcmGlobalState.CmdBufferFlush( CPs3gcmGlobalState::kFlushForcefully );
while ( *( volatile uint32* )pJts )
{
sys_timer_usleep( 50 );// wait for nop
}
#ifdef DEBUG_FPC_PATCHER
{
fltx4 reg[job_fpcpatch::MAX_VIRTUAL_CONST_COUNT];
m_state.GetSyncState( reg );
Assert( !V_memcmp( m_pSyncState, reg, sizeof( fltx4 ) * job_fpcpatch::MAX_VIRTUAL_CONST_COUNT ) );
}
ValidatePatchedProgram( psh->m_pCgProg, m_pPutFragmentProgram );
uint32 nFragmentProgramOffsetCheck;
GCM_MUST_SUCCEED( cellGcmAddressToOffset, m_pPutFragmentProgram, &nFragmentProgramOffsetCheck );
Assert( nFragmentProgramOffsetCheck == nFragmentProgramOffset );
#endif
g_ps3gcmGlobalState.CmdBufferFinish();
}
#endif
m_nFpcPatchCounter++;
}
uint g_nFpcPatch2JobExtraFlags = 0; // set this to 2 and SPU will break
static int s_nFpcPatch2Calls = 0;
void CFragmentProgramConstantPatcher_SPU::FpcPatch2( const job_fpcpatch2::FpHeader_t * prog, uint nFpDmaSize, void *pPatchedProgram, uint32 * pJts )
{
MICRO_PROFILE( g_mpFpcPatch2 );
#ifdef VJOBS_ON_SPURS
VjobChain3 &jobChain = g_spuGcm.m_jobSink;
uint32 nUCodeSize = prog->m_nUcodeSize;
CellSpursJob128 * pJob = g_spuGcm.m_jobPool128.Alloc( *g_spuGcm.m_pRoot->m_pFpcPatch2 );
Assert( pJob->header.sizeDmaList == 0 && pJob->header.sizeInOrInOut == 0 ); // the default MUST always be 1
pJob->header.useInOutBuffer = 1;
CDmaListConstructor dmaConstructor( pJob->workArea.dmaList );
dmaConstructor.AddInputDma( nFpDmaSize, prog );
dmaConstructor.AddInputDma( sizeof( *m_state.m_pSharedState ), ( void* )m_state.m_pSharedState );
// the g_spuGcmShared.m_nFpcpStartRangesAfterLastSync runs ahead of m_state.m_pSharedState->m_nStartRanges , because it's a PREDICTED
// start of range. It'll be absolutely in-sync with m_state.m_pSharedState->m_nStartRanges if we run SPUs synchronously
#ifdef DBGFLAG_ASSERT
uint nSharedStateStartRanges = m_state.m_pSharedState->m_nStartRanges;
#endif
// NOTE: if the asserts below fire, it may be due to invalid value in nSharedStateStartRanges because SPU DMAs stuff right down to m_state.m_pSharedState and it's changing while this code executes
Assert( uint( m_state.m_nEndOfJournalIdx - nSharedStateStartRanges ) <= m_state.m_pSharedState->m_nBufferMask + 1 );
Assert( uint( g_spuGcmShared.m_nFpcpStartRangesAfterLastSync - nSharedStateStartRanges ) <= uint( m_state.m_nEndOfJournalIdx - nSharedStateStartRanges ) );
uint nStartOfJournal = /*nSharedStateStartRanges*/g_spuGcmShared.m_nFpcpStartRangesAfterLastSync, nBufferMask = m_state.m_pSharedState->m_nBufferMask;
// we have 4 DMA elements ( 2..6 ) to fit the constant buffer; the 1st element may have to be as small as 16 bytes.
// this leaves the max constant buffer size 4 * 16kb + 16 bytes
const uint numRangeQwords = ( m_state.m_nEndOfJournalIdx - nStartOfJournal );
Assert( numRangeQwords <= nBufferMask + 1 );
if ( numRangeQwords != 0 )
{
uint nEndOfSpan0 = ( nStartOfJournal + nBufferMask + 1 ) & ~nBufferMask;
if ( ( signed int )( nEndOfSpan0 - m_state.m_nEndOfJournalIdx ) >= 0 )
{
//numRangeQwords = ( m_state.m_nEndOfJournalIdx - nStartOfJournal );
dmaConstructor.AddInputDmaLarge( ( numRangeQwords ) * sizeof( fltx4 ), m_state.m_pSharedState->GetBufferStart() + ( nStartOfJournal & nBufferMask ) );
}
else
{
//numRangeQwords = nFirstRange + nSecondRange ;
dmaConstructor.AddInputDmaLarge( ( nEndOfSpan0 - nStartOfJournal ) * sizeof( fltx4 ), m_state.m_pSharedState->GetBufferStart() + ( nStartOfJournal & nBufferMask ) );
dmaConstructor.AddInputDmaLarge( ( m_state.m_nEndOfJournalIdx - nEndOfSpan0 ) * sizeof( fltx4 ), m_state.m_pSharedState->GetBufferStart() );
}
}
else
{
dmaConstructor.AddSizeInOrInOut( 16 ); // we need at least 16 bytes in the ranges area for temporary storage
}
dmaConstructor.FinishIoBuffer( &pJob->header );
if( pJob->header.sizeDmaList > 7 * sizeof( uint64 ) )
{
Error( "FpcPatch2: DMA list size out of range (%d). job_fpcpatch2 parameters won't fit. numRangeQwords = %d\n", pJob->header.sizeDmaList, numRangeQwords );
}
// IMPORTANT: make it always synchronous , in case we don't have the target to patch. The only reason for this job to exist is to make it synchronous
// Also, if the range is large, still make it synchronous, to avoid subsequent jobs doing a lot of computations in vein
uint nAsync = !pPatchedProgram || numRangeQwords >= 1024 ? 0 : ( ( m_nFpcPatchCounter ) & m_nFpcPatchSyncMask ) ;
dmaConstructor[7][0] = m_nFpcPatchCounterOfLastSyncJob;
dmaConstructor[7][1] = m_nFpcPatchCounter;
dmaConstructor[8][0] = ( uint32 ) pPatchedProgram;
dmaConstructor[8][1] = uintp( pJts ); // the SPU->RSX dma element; may be NULL
dmaConstructor[9][0] = m_state.m_nEndOfJournalIdx;
dmaConstructor[9][1] = ( uint32 ) nStartOfJournal;
if( !IsCert() )
{
pJob->header.jobType |= CELL_SPURS_JOB_TYPE_MEMORY_CHECK;
}
dmaConstructor[8][0] |= g_nFpcPatch2JobExtraFlags;
if ( !nAsync )
{
dmaConstructor[8][0] |= job_fpcpatch::FLAG_PUT_STATE;
m_nFpcPatchCounterOfLastSyncJob = m_nFpcPatchCounter;
pJob->header.jobType |= CELL_SPURS_JOB_TYPE_STALL_SUCCESSOR;
g_spuGcmShared.m_nFpcpStartRangesAfterLastSync = m_state.m_nEndOfJournalIdx;
}
#ifdef DBGFLAG_ASSERT
int nError = cellSpursCheckJob( ( const CellSpursJob256* )pJob, sizeof( *pJob ), 256 );
static int s_nJobErrors = 0;
if( CELL_OK != nError )
{
++s_nJobErrors;
}
#endif
if ( !nAsync )
{
jobChain.PushSyncJobSync( CELL_SPURS_JOB_COMMAND_JOB( pJob ) );
}
else
{
jobChain.Push( CELL_SPURS_JOB_COMMAND_JOB( pJob ) );
}
#ifdef DEBUG_FPC_PATCHER
if( m_bSync )
{
if( pJts )
{
volatile uint32 * pJts2 = pJts;
while( *pJts2 )
continue;
}
volatile uint64_t * pEaJob = &pJob->header.eaBinary;
while( * pEaJob )
continue;
}
#endif
s_nFpcPatch2Calls++;
#endif
}
#ifdef DEBUG_FPC_PATCHER
extern void PatchUcodeConstSwap( uint32 * pDestination, const uint32 * pSource, int nLength );
extern uint fspatchGetLength( CGtype nType );
uint32 g_nConstLengthCounter[5] = { 0, 0, 0, 0, 0 };
void CFragmentProgramConstantPatcher_SPU::ValidatePatchedProgram( const CgBinaryProgram * prog, void * pPatchedUcode )
{
Assert( prog->profile == PROFILE_SCE_FP_RSX && prog->binaryFormatRevision == CG_BINARY_FORMAT_REVISION );
uint32 nUCodeSize = prog->ucodeSize;
void * pUcode = stackalloc( nUCodeSize );
void * pSourceUcode = ( ( uint8* ) prog ) + prog->ucode;
V_memcpy( pUcode, ( ( uint8* ) prog ) + prog->ucode, nUCodeSize );
CgBinaryParameter * pParameters = ( CgBinaryParameter * )( uintp( prog ) + prog->parameterArray ) ;
uint32 * pPatchDestination = NULL;
Assert( cellGcmCgGetCountParameter( ( CGprogram ) prog ) == prog->parameterCount );
for ( int nPar = 0; nPar < prog->parameterCount; ++nPar )
{
CgBinaryParameter * pPar = pParameters + nPar;
Assert( pPar == ( CgBinaryParameter * ) cellGcmCgGetIndexParameter( ( CGprogram ) prog, nPar ) );
#ifdef DBGFLAG_ASSERT
const char * pLeafName = ( const char * )( uintp( prog ) + pPar->name );
( void )pLeafName;
uint32 * pDefault = pPar->defaultValue ? ( uint32* )( uintp( prog ) + pPar->defaultValue ) : NULL ;
#endif
if ( pPar->embeddedConst )
{
Assert( pPar->res == CG_C && pPar->var == CG_UNIFORM ); // this MUST be a uniform constant.. at least I think that's the only kind we need to patch
const CgBinaryEmbeddedConstant * pEmbedded = ( const CgBinaryEmbeddedConstant* )( uintp( prog ) + pPar->embeddedConst );
int nLength = fspatchGetLength( pPar->type );
g_nConstLengthCounter[nLength] ++;
for ( uint nEm = 0; nEm < pEmbedded->ucodeCount; ++ nEm )
{
uint ucodeOffset = pEmbedded->ucodeOffset[nEm]; // is this the offset from prog structure start?
Assert( ucodeOffset < nUCodeSize - 4 );
#ifdef DBGFLAG_ASSERT
Assert( cellGcmCgGetEmbeddedConstantOffset( ( CGprogram ) prog, ( CGparameter ) pPar, nEm ) == ucodeOffset );
const float * pDefaultCheck = cellGcmCgGetParameterValues( ( CGprogram ) prog, ( CGparameter ) pPar );
Assert( pDefault == ( uint32* ) pDefaultCheck );
uint32 * pUcodeEmConst = ( uint32* )( uintp( pSourceUcode ) + ucodeOffset );
Assert( !pDefault || !V_memcmp( pDefault, pUcodeEmConst, nLength * 4 ) );
#endif
pPatchDestination = ( uint32* )( uintp( pUcode ) + ucodeOffset );
uint32 * pPatchedCheck = ( uint32* )( uintp( pPatchedUcode ) + ucodeOffset );
PatchUcodeConstSwap( pPatchDestination, ( uint32* ) & ( m_pSyncState[pPar->resIndex] ), nLength );
Assert( !V_memcmp( pPatchDestination, pPatchedCheck, nLength * 4 ) );
}
}
}
Assert( !V_memcmp( pPatchedUcode, pUcode, nUCodeSize ) );
}
#endif
void FpcPatchState::Init( job_fpcpatch::FpcPatchState_t * pSharedState, uint32 nBufferQwords )
{
#ifdef _DEBUG
//m_nRangesAdded = 0;
#endif
pSharedState->m_nBufferMask = m_nBufferMask = nBufferQwords - 1;
pSharedState->m_nStartRanges = m_nEndOfJournalIdx = IsCert() ? 0 : nBufferQwords - 128;
pSharedState->m_eaThis = m_pSharedState = pSharedState;
pSharedState->m_nThisStatePatchCounter = 0;
pSharedState->m_nDebuggerBreak = 0;
}
void FpcPatchState::GetSyncState( fltx4 * pRegisters )
{
V_memcpy( pRegisters, m_pSharedState->m_reg, job_fpcpatch:: MAX_VIRTUAL_CONST_COUNT * sizeof( fltx4 ) );
for( uint nJournalIdx = m_pSharedState->m_nStartRanges; nJournalIdx < m_nEndOfJournalIdx ; )
{
job_fpcpatch:: ConstRangeHeader_t & range = ((job_fpcpatch::ConstRangeHeader_t*)m_pSharedState->GetBufferStart())[ nJournalIdx & m_pSharedState->m_nBufferMask ];
nJournalIdx++;
for( uint nConstIdx = 0 ; nConstIdx < range.m_u32.m_nCount; ++nConstIdx, ++nJournalIdx )
{
pRegisters[ range.m_u32.m_nStart + nConstIdx ] = m_pSharedState->GetBufferStart()[nJournalIdx & m_pSharedState->m_nBufferMask ];
}
}
}
/*
void FpcPatchState::Reset()
{
m_nEndOfJournalIdx = 0;
m_pSharedState->m_nStartRanges = 0;
}
*/
#ifdef _DEBUG
static int s_nDebugRangeAdd = -1, s_nDebugSetConst = -1;
#endif
uint FpcPatchState::AddRange( uint32 nStart, uint32 nCount, const float * pData )
{
#ifndef _CERT
if( nStart + nCount > job_fpcpatch::MAX_VIRTUAL_CONST_COUNT )
{
Error( "AddRange(%d..%d) out of range <%d\n", nStart, nCount, int( job_fpcpatch::MAX_VIRTUAL_CONST_COUNT ) );
}
#endif
#ifdef _DEBUG
//Assert( s_nDebugRangeAdd != m_nRangesAdded );
if( int( s_nDebugSetConst - nStart ) >= 0 && int( s_nDebugSetConst - nStart ) < int( nCount ) )
{
fltx4 flDebugRegister = LoadUnalignedSIMD( pData + 4 * int( s_nDebugSetConst - nStart ) );
DebuggerBreak();
}
//++m_nRangesAdded;
#endif
// spin-wait, then V_memcpy range
COMPILE_TIME_ASSERT( sizeof( job_fpcpatch::ConstRangeHeader_t ) == 16 );
const uint nSpins = 0x1FF;
Assert( !( nSpins & ( nSpins + 1 ) ) );
//
// We need space for nCount + 1 QWords (1 Qword for the ConstRangeHeader_t)
// And we need m_nEndOfJournalIdx != m_nStartRanges to distinguish between
// the all-empty and all-full buffers
//
uint nAttempts = 0;
for ( ; ; ++nAttempts )
{
uint32 nStartRanges = m_pSharedState->m_nStartRanges;
Assert( uint32( m_nEndOfJournalIdx - nStartRanges ) <= m_nBufferMask + 1 );
// compute the new end - start; is it running further than buffer size away?
if ( ( m_nEndOfJournalIdx + nCount - ( nStartRanges + m_nBufferMask + 1 ) ) & 0x80000000 )
{ // no, the comparison is negative, therefore it's safe to fill it in
break;
}
// if ( ( nAttempts & nSpins ) == nSpins )
{
// the caller prints warning about this stall.
sys_timer_usleep( 60 ); // TODO: proper spinwait; proper OS syncronization
if( nAttempts == ( 1000000 / 60 ) )
{
// waiting for a second already ...
Warning(
"***************************************************************************************************************\n"
"* SPU hang in FpcPatchState::AddRange(). Please send this log (including a couple of screens above) to Sergiy *\n"
);
Msg( "AddRange(%d,%d,%p), ", nStart, nCount, pData );
Msg( "SharedState @%p {start=0x%X&0x%X,patch=%X,job=%X},", m_pSharedState, m_pSharedState->m_nStartRanges, m_pSharedState->m_nBufferMask, m_pSharedState->m_nThisStatePatchCounter, m_pSharedState->m_eaThisStateJobDescriptor );
Msg( "FpcpState @%p {end=0x%X},", this, this->m_nEndOfJournalIdx );
Msg( "SpuGcmShared trace {0x%X,0x%X,0x%X}\n", g_spuGcmShared.m_nFpcPatchCounterOfLastSyncJob, g_spuGcmShared.m_nFpcPatchCounter, g_spuGcmShared.m_nFpcpStartRangesAfterLastSync );
Msg( "RSX put=%X, get=%X sysring{put=%X,end=%X}\n", g_spuGcmShared.m_eaGcmControlRegister->put, g_spuGcmShared.m_eaGcmControlRegister->get,
g_spuGcmShared.m_sysring.m_nPut, g_spuGcmShared.m_sysring.m_nEnd );
Msg( "last JTS ret guard patched @%X, ", *cellGcmGetLabelAddress( GCM_LABEL_DEBUG_FPCP_RING ) );
Msg( "ringRsx[%d]:", g_spuGcmShared.m_fpcpRing.m_ringRsx.Count() );
for( int i = 0; i < g_spuGcmShared.m_fpcpRing.m_ringRsx.Count(); ++i )
{
RsxSpuDoubleRing::Segment_t & segment = g_spuGcmShared.m_fpcpRing.m_ringRsx[i];
Msg(" {%X,%p,%s}", segment.m_eaBase, segment.m_pSpuJts, *(segment.m_pSpuJts) == CELL_SPURS_JOB_COMMAND_LWSYNC ? "LWSYNC" : *(segment.m_pSpuJts) == CELL_SPURS_JOB_COMMAND_JTS ? "JTS" : "ERROR" );
}
Msg( "\nringSpu[%d]:", g_spuGcmShared.m_fpcpRing.m_ringSpu.Count() );
for( int i = 0; i < g_spuGcmShared.m_fpcpRing.m_ringSpu.Count(); ++i )
{
RsxSpuDoubleRing::Segment_t & segment = g_spuGcmShared.m_fpcpRing.m_ringSpu[i];
Msg(" {%X,%p,%s}", segment.m_eaBase, segment.m_pSpuJts, *(segment.m_pSpuJts) == CELL_SPURS_JOB_COMMAND_LWSYNC ? "LWSYNC" : *(segment.m_pSpuJts) == CELL_SPURS_JOB_COMMAND_JTS ? "JTS" : "ERROR" );
}
Msg( "***************************************************************************************************************\n" );
}
}
}
// we have enough free buffer to insert stuff
job_fpcpatch::ConstRangeHeader_t *hdr = (job_fpcpatch::ConstRangeHeader_t *)AddInternalPtr();
hdr->m_u32.m_nStart = nStart;
hdr->m_u32.m_nCount = nCount;
// add constants block
AddInternalBlock( pData, nCount );
return nAttempts;
}
#endif

View File

@@ -0,0 +1,123 @@
//========= Copyright <20> Valve Corporation, All rights reserved. ====//
//
// Fragment Program Constant Patcher: an SPU implementation, V0
//
#ifndef PS3_SHADER_CONSTANT_PATCH_SPU_HDR
#define PS3_SHADER_CONSTANT_PATCH_SPU_HDR
#ifdef _PS3
#include "vjobs/fpcpatch_shared.h"
#include <cg/cg.h>
#include <cg/cgBinary.h>
#ifdef _DEBUG
//#define DEBUG_FPC_PATCHER
#endif
class FpcPatchState
{
public:
job_fpcpatch::FpcPatchState_t * m_pSharedState;
uint32 m_nEndOfJournalIdx; // this is PPU-side variable only, written by PPU only
fltx4 * GetBufferStart(){ return m_pSharedState->GetBufferStart() ; } // the buffer start address
uint32 m_nBufferMask; // the number of Qwords in the buffer
//#ifdef _DEBUG
//int m_nRangesAdded;
//#endif
public:
FpcPatchState(){m_pSharedState = NULL;}
void Init( job_fpcpatch::FpcPatchState_t * pSharedState, uint32 nBufferQwords );
void Reset();
uint AddRange( uint32 nStart, uint32 nCount, const float * pData );
void GetSyncState( fltx4 * pRegisters );
protected:
fltx4 * AddInternalPtr()
{
fltx4 * pOut = GetBufferStart() + ( m_nEndOfJournalIdx & m_nBufferMask );
m_nEndOfJournalIdx++;
return pOut;
}
void AddInternal( const fltx4 f4 )
{
*AddInternalPtr() = f4;
}
inline void AddInternalBlock( const void *pBlock, const uint32 numFltx4s )
{
// Fit the first portion until the end of the buffer, second portion at start
uint32 const nCurrentIdx = ( m_nEndOfJournalIdx & m_nBufferMask ); // the start index to copy to
uint32 const numFltx4sUntilEnd = ( -nCurrentIdx ) & m_nBufferMask; // number of fltx4's from the nCurrentIdx to the end of the current buffer ring
uint32 const numFirstCopy = MIN( numFltx4sUntilEnd, numFltx4s ); // number of fltx4's to copy first
memcpy( GetBufferStart() + nCurrentIdx, pBlock, numFirstCopy * sizeof( fltx4 ) );
memcpy( GetBufferStart(), ( ( fltx4* ) pBlock ) + numFirstCopy, ( numFltx4s - numFirstCopy ) * sizeof( fltx4 ) );
m_nEndOfJournalIdx += numFltx4s;
}
};
struct IDirect3DPixelShader9 ;
class CFragmentProgramConstantPatcher_SPU
{
public:
CFragmentProgramConstantPatcher_SPU();
~CFragmentProgramConstantPatcher_SPU();
void InitLocal( void *pBuffer, uint nSize );
void Shutdown();
// semantics should match cgGLSetFragmentRegisterBlock()
void SetFragmentRegisterBlock( uint StartRegister, uint Vector4fCount, const float* pConstantData );
// semantics of cgGLBindProgram( pPixelShader->m_pixProgram->m_CGprogram )
void BindProgram( const CgBinaryProgram *prog );
void BindProgram( const struct IDirect3DPixelShader9 * prog );
void BeginScene();
void EndScene();
//job_fpcpatch::FpcPatchState_t * GetSharedState(){return m_state.m_pSharedState; }
uint GetStateEndOfJournalIdx() { return m_state.m_nEndOfJournalIdx; }
uint GetJournalCapacity() const { return m_state.m_nBufferMask + 1; }
int GetJournalSpaceUsedSince( uint nMarker )const{ return int( m_state.m_nEndOfJournalIdx - nMarker ); }
int GetJournalSpaceLeftSince( uint nMarker )const{ return int( ( m_state.m_nBufferMask + 1 ) - ( m_state.m_nEndOfJournalIdx - nMarker ) ); }
protected:
void ResetPut();
void * FpcPatch( const struct CgBinaryProgram * prog, void * pFragmentProgramDestination, uint32 * pJts );
void FpcPatch2( const job_fpcpatch2::FpHeader_t * psh, uint nFpDmaSize, void *pPatchedProgram, uint32 * pJts );
protected:
friend class CSpuGcm;
FpcPatchState m_state;
uint32* m_pBuffer, *m_pBufferEnd;
int m_nIoOffsetDelta; // m_pBuffer + m_nIoOffsetDelta == IO offset usable by RSX
uint32 * m_pPutFragmentProgram;
uint m_nFpcPatchCounterAtBeginScene; // used for timing
uint m_nFpcPatchCounterOfLastSyncJob;
uint m_nBufferLocation;// CELL_GCM_LOCATION_MAIN
uint m_nFpcPatchCounter, m_nFpcPatchSyncMask;
//uint m_nStartRangesAfterLastSync; // this is the index used to upload only the useful constants to SPU
bool m_isBufferPassedIn;
bool m_bFpcPatchOnPpu, m_bEnableSPU;
#ifdef DEBUG_FPC_PATCHER
void ValidatePatchedProgram( const CgBinaryProgram *prog, void * pPatchedUcode );
fltx4 *m_pSyncState;
bool m_bTestAlwaysStateSync;
bool m_bSync; // don't use JTS, but just patch synchronously (may be more stable with GPAD)
#endif
};
extern CFragmentProgramConstantPatcher_SPU g_pixelShaderPatcher; // Patches pixel shader constants
#endif
#endif

View File

@@ -0,0 +1,202 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Configure gcm be inline, unsfae etc....
// Inlcude (spu or ppu) after cell headers
//
//==================================================================================================
#ifndef INCLUDED_GCMCONFIG_H
#define INCLUDED_GCMCONFIG_H
// Comment in one GCMCONFIG defn as required
//#define GCMCONFIG(x) x##Inline
#define GCMCONFIG(x) x##UnsafeInline
//#define GCMCONFIG(x) x
#define GCM_FUNC( GCM_FUNCTION, ...) GCM_FUNCTION ( gpGcmContext, ##__VA_ARGS__ )
#define cellGcmSetReferenceCommand GCMCONFIG(cellGcmSetReferenceCommand)
#define cellGcmSetJumpCommand GCMCONFIG(cellGcmSetJumpCommand)
#define cellGcmSetCallCommand GCMCONFIG(cellGcmSetCallCommand)
#define cellGcmSetReturnCommand GCMCONFIG(cellGcmSetReturnCommand)
#define cellGcmSetAntiAliasingControl GCMCONFIG(cellGcmSetAntiAliasingControl)
#define cellGcmSetWaitLabel GCMCONFIG(cellGcmSetWaitLabel)
#define cellGcmSetWriteCommandLabel GCMCONFIG(cellGcmSetWriteCommandLabel)
#define cellGcmSetWriteBackEndLabel GCMCONFIG(cellGcmSetWriteBackEndLabel)
#define cellGcmSetWriteTextureLabel GCMCONFIG(cellGcmSetWriteTextureLabel)
#define cellGcmSetTimeStamp GCMCONFIG(cellGcmSetTimeStamp)
#define cellGcmSetInvalidateZcull GCMCONFIG(cellGcmSetInvalidateZcull)
#define cellGcmSetAlphaFunc GCMCONFIG(cellGcmSetAlphaFunc)
#define cellGcmSetBlendColor GCMCONFIG(cellGcmSetBlendColor)
#define cellGcmSetBlendEquation GCMCONFIG(cellGcmSetBlendEquation)
#define cellGcmSetBlendFunc GCMCONFIG(cellGcmSetBlendFunc)
#define cellGcmSetClearSurface GCMCONFIG(cellGcmSetClearSurface)
#define cellGcmSetClearColor GCMCONFIG(cellGcmSetClearColor)
#define cellGcmSetClearDepthStencil GCMCONFIG(cellGcmSetClearDepthStencil)
#define cellGcmSetColorMask GCMCONFIG(cellGcmSetColorMask)
#define cellGcmSetColorMaskMrt GCMCONFIG(cellGcmSetColorMaskMrt)
#define cellGcmSetCullFace GCMCONFIG(cellGcmSetCullFace)
#define cellGcmSetDepthBounds GCMCONFIG(cellGcmSetDepthBounds)
#define cellGcmSetDepthFunc GCMCONFIG(cellGcmSetDepthFunc)
#define cellGcmSetDepthMask GCMCONFIG(cellGcmSetDepthMask)
#define cellGcmSetFrontFace GCMCONFIG(cellGcmSetFrontFace)
#define cellGcmSetLineWidth GCMCONFIG(cellGcmSetLineWidth)
#define cellGcmSetLineSmoothEnable GCMCONFIG(cellGcmSetLineSmoothEnable)
#define cellGcmSetLineStippleEnable GCMCONFIG(cellGcmSetLineStippleEnable)
#define cellGcmSetLineStipplePattern GCMCONFIG(cellGcmSetLineStipplePattern)
#define cellGcmSetLogicOp GCMCONFIG(cellGcmSetLogicOp)
#define cellGcmSetPointSize GCMCONFIG(cellGcmSetPointSize)
#define cellGcmSetPolygonOffset GCMCONFIG(cellGcmSetPolygonOffset)
#define cellGcmSetPolySmoothEnable GCMCONFIG(cellGcmSetPolySmoothEnable)
#define cellGcmSetPolygonStippleEnable GCMCONFIG(cellGcmSetPolygonStippleEnable)
#define cellGcmSetPolygonStipplePattern GCMCONFIG(cellGcmSetPolygonStipplePattern)
#define cellGcmSetFrontPolygonMode GCMCONFIG(cellGcmSetFrontPolygonMode)
#define cellGcmSetBackPolygonMode GCMCONFIG(cellGcmSetBackPolygonMode)
#define cellGcmSetScissor GCMCONFIG(cellGcmSetScissor)
#define cellGcmSetShadeMode GCMCONFIG(cellGcmSetShadeMode)
#define cellGcmSetTwoSideLightEnable GCMCONFIG(cellGcmSetTwoSideLightEnable)
#define cellGcmSetStencilFunc GCMCONFIG(cellGcmSetStencilFunc)
#define cellGcmSetBackStencilFunc GCMCONFIG(cellGcmSetBackStencilFunc)
#define cellGcmSetStencilMask GCMCONFIG(cellGcmSetStencilMask)
#define cellGcmSetBackStencilMask GCMCONFIG(cellGcmSetBackStencilMask)
#define cellGcmSetStencilOp GCMCONFIG(cellGcmSetStencilOp)
#define cellGcmSetBackStencilOp GCMCONFIG(cellGcmSetBackStencilOp)
#define cellGcmSetZMinMaxControl GCMCONFIG(cellGcmSetZMinMaxControl)
#define cellGcmSetAlphaTestEnable GCMCONFIG(cellGcmSetAlphaTestEnable)
#define cellGcmSetBlendEnable GCMCONFIG(cellGcmSetBlendEnable)
#define cellGcmSetBlendEnableMrt GCMCONFIG(cellGcmSetBlendEnableMrt)
#define cellGcmSetLogicOpEnable GCMCONFIG(cellGcmSetLogicOpEnable)
#define cellGcmSetCullFaceEnable GCMCONFIG(cellGcmSetCullFaceEnable)
#define cellGcmSetDepthBoundsTestEnable GCMCONFIG(cellGcmSetDepthBoundsTestEnable)
#define cellGcmSetDepthTestEnable GCMCONFIG(cellGcmSetDepthTestEnable)
#define cellGcmSetDitherEnable GCMCONFIG(cellGcmSetDitherEnable)
#define cellGcmSetStencilTestEnable GCMCONFIG(cellGcmSetStencilTestEnable)
#define cellGcmSetTwoSidedStencilTestEnable GCMCONFIG(cellGcmSetTwoSidedStencilTestEnable)
#define cellGcmSetPolygonOffsetFillEnable GCMCONFIG(cellGcmSetPolygonOffsetFillEnable)
#define cellGcmSetRestartIndexEnable GCMCONFIG(cellGcmSetRestartIndexEnable)
#define cellGcmSetPointSpriteControl GCMCONFIG(cellGcmSetPointSpriteControl)
#define cellGcmSetInvalidateTextureCache GCMCONFIG(cellGcmSetInvalidateTextureCache)
#define cellGcmSetTextureBorderColor GCMCONFIG(cellGcmSetTextureBorderColor)
#define cellGcmSetTextureControl GCMCONFIG(cellGcmSetTextureControl)
#define cellGcmSetTextureOptimization GCMCONFIG(cellGcmSetTextureOptimization)
#define cellGcmSetCylindricalWrap GCMCONFIG(cellGcmSetCylindricalWrap)
#define cellGcmSetInvalidateVertexCache GCMCONFIG(cellGcmSetInvalidateVertexCache)
#define cellGcmSetRestartIndex GCMCONFIG(cellGcmSetRestartIndex)
#define cellGcmSetVertexData4f GCMCONFIG(cellGcmSetVertexData4f)
#define cellGcmSetFrequencyDividerOperation GCMCONFIG(cellGcmSetFrequencyDividerOperation)
#define cellGcmSetTransformBranchBits GCMCONFIG(cellGcmSetTransformBranchBits)
#define cellGcmSetVertexAttribInputMask GCMCONFIG(cellGcmSetVertexAttribInputMask)
#define cellGcmSetFragmentProgramGammaEnable GCMCONFIG(cellGcmSetFragmentProgramGammaEnable)
#define cellGcmSetRenderEnable GCMCONFIG(cellGcmSetRenderEnable)
#define cellGcmSetZpassPixelCountEnable GCMCONFIG(cellGcmSetZpassPixelCountEnable)
#define cellGcmSetClearReport GCMCONFIG(cellGcmSetClearReport)
#define cellGcmSetReport GCMCONFIG(cellGcmSetReport)
#define cellGcmSetZcullStatsEnable GCMCONFIG(cellGcmSetZcullStatsEnable)
#define cellGcmSetZcullControl GCMCONFIG(cellGcmSetZcullControl)
#define cellGcmSetZcullLimit GCMCONFIG(cellGcmSetZcullLimit)
#define cellGcmSetScullControl GCMCONFIG(cellGcmSetScullControl)
#define cellGcmSetVertexTextureAddress GCMCONFIG(cellGcmSetVertexTextureAddress)
#define cellGcmSetVertexTextureFilter GCMCONFIG(cellGcmSetVertexTextureFilter)
#define cellGcmSetVertexTextureControl GCMCONFIG(cellGcmSetVertexTextureControl)
#define cellGcmSetVertexTextureBorderColor GCMCONFIG(cellGcmSetVertexTextureBorderColor)
#define cellGcmSetPerfMonTrigger GCMCONFIG(cellGcmSetPerfMonTrigger)
#define cellGcmSetFogMode GCMCONFIG(cellGcmSetFogMode)
#define cellGcmSetFogParams GCMCONFIG(cellGcmSetFogParams)
#define cellGcmSetTransferLocation GCMCONFIG(cellGcmSetTransferLocation)
#define cellGcmSetDepthFormat GCMCONFIG(cellGcmSetDepthFormat)
#define cellGcmSetBlendOptimization GCMCONFIG(cellGcmSetBlendOptimization)
#define cellGcmSetPolygonOffsetLineEnable GCMCONFIG(cellGcmSetPolygonOffsetLineEnable)
#define cellGcmSetVertexAttribOutputMask GCMCONFIG(cellGcmSetVertexAttribOutputMask)
#define cellGcmSetTextureRemap GCMCONFIG(cellGcmSetTextureRemap)
#define cellGcmSetVertexProgramStartSlot GCMCONFIG(cellGcmSetVertexProgramStartSlot)
#define cellGcmSetVertexProgramRegisterCount GCMCONFIG(cellGcmSetVertexProgramRegisterCount)
#define cellGcmSetTransferDataMode GCMCONFIG(cellGcmSetTransferDataMode)
#define cellGcmSetDrawBegin GCMCONFIG(cellGcmSetDrawBegin)
#define cellGcmSetDrawEnd GCMCONFIG(cellGcmSetDrawEnd)
#define cellGcmSetVertexDataArrayFormat GCMCONFIG(cellGcmSetVertexDataArrayFormat)
#define cellGcmSetVertexDataArrayOffset GCMCONFIG(cellGcmSetVertexDataArrayOffset)
#define cellGcmSetUpdateFragmentProgramParameterLocation GCMCONFIG(cellGcmSetUpdateFragmentProgramParameterLocation)
#define cellGcmSetVertexDataBase GCMCONFIG(cellGcmSetVertexDataBase)
#define cellGcmSetFragmentProgramOffset GCMCONFIG(cellGcmSetFragmentProgramOffset)
#define cellGcmSetFragmentProgramControl GCMCONFIG(cellGcmSetFragmentProgramControl)
#define cellGcmSetClearZcullSurface GCMCONFIG(cellGcmSetClearZcullSurface)
#define cellGcmSetZcullEnable GCMCONFIG(cellGcmSetZcullEnable)
#define cellGcmSetUserCommand GCMCONFIG(cellGcmSetUserCommand)
#define cellGcmSetReportLocation GCMCONFIG(cellGcmSetReportLocation)
#define cellGcmSetNotifyIndex GCMCONFIG(cellGcmSetNotifyIndex)
#define cellGcmSetNotify GCMCONFIG(cellGcmSetNotify)
#define cellGcmSetTextureFilter GCMCONFIG(cellGcmSetTextureFilter)
#define cellGcmSetTextureAddress GCMCONFIG(cellGcmSetTextureAddress)
#define cellGcmSetUserClipPlaneControl GCMCONFIG(cellGcmSetUserClipPlaneControl)
#define cellGcmSetAnisoSpread GCMCONFIG(cellGcmSetAnisoSpread)
#define cellGcmSetNopCommand GCMCONFIG(cellGcmSetNopCommand)
#define cellGcmSetSkipNop GCMCONFIG(cellGcmSetSkipNop)
#define cellGcmReserveMethodSize GCMCONFIG(cellGcmReserveMethodSize)
#define cellGcmSetWriteBackEndLabelForConditional GCMCONFIG(cellGcmSetWriteBackEndLabelForConditional)
#define cellGcmSetWriteTextureLabelForConditional GCMCONFIG(cellGcmSetWriteTextureLabelForConditional)
#define cellGcmSetVertexProgram GCMCONFIG(cellGcmSetVertexProgram)
#define cellGcmSetFragmentProgramLoadLocation GCMCONFIG(cellGcmSetFragmentProgramLoadLocation)
#define cellGcmSetVertexProgramLoad GCMCONFIG(cellGcmSetVertexProgramLoad)
#define cellGcmSetVertexProgramLoadSlot GCMCONFIG(cellGcmSetVertexProgramLoadSlot)
#define cellGcmSetVertexProgramConstants GCMCONFIG(cellGcmSetVertexProgramConstants)
#define cellGcmSetVertexProgramParameterBlock GCMCONFIG(cellGcmSetVertexProgramParameterBlock)
#define cellGcmSetVertexDataArray GCMCONFIG(cellGcmSetVertexDataArray)
#define cellGcmSetTextureBorder GCMCONFIG(cellGcmSetTextureBorder)
#define cellGcmSetWaitFlip GCMCONFIG(cellGcmSetWaitFlip)
#define cellGcmSetFragmentProgramParameterPointer GCMCONFIG(cellGcmSetFragmentProgramParameterPointer)
#define cellGcmSetFragmentProgramParameter GCMCONFIG(cellGcmSetFragmentProgramParameter)
#define cellGcmSetFragmentProgram GCMCONFIG(cellGcmSetFragmentProgram)
#define cellGcmSetVertexProgramParameter GCMCONFIG(cellGcmSetVertexProgramParameter)
#define cellGcmSetFragmentProgramLoad GCMCONFIG(cellGcmSetFragmentProgramLoad)
#define cellGcmSetUpdateFragmentProgramParameter GCMCONFIG(cellGcmSetUpdateFragmentProgramParameter)
#define cellGcmSetTextureFilterSigned GCMCONFIG(cellGcmSetTextureFilterSigned)
#define cellGcmSetClipMinMax GCMCONFIG(cellGcmSetClipMinMax)
#define cellGcmSetViewport GCMCONFIG(cellGcmSetViewport)
#define cellGcmSetTextureAddressAnisoBiasRemap GCMCONFIG(cellGcmSetTextureAddressAnisoBiasRemap)
#define cellGcmSetTextureAddressAnisoBias GCMCONFIG(cellGcmSetTextureAddressAnisoBias)
#define cellGcmSetTexture GCMCONFIG(cellGcmSetTexture)
#define cellGcmSetVertexTexture GCMCONFIG(cellGcmSetVertexTexture)
#define cellGcmSetSurface GCMCONFIG(cellGcmSetSurface)
#define cellGcmSetSurfaceWindow GCMCONFIG(cellGcmSetSurfaceWindow)
#define cellGcmSetInlineTransfer GCMCONFIG(cellGcmSetInlineTransfer)
#define cellGcmInlineTransfer GCMCONFIG(cellGcmInlineTransfer)
#define cellGcmSetTransferImage GCMCONFIG(cellGcmSetTransferImage)
#define cellGcmTransferData GCMCONFIG(cellGcmTransferData)
#define cellGcmSetTransferData GCMCONFIG(cellGcmSetTransferData)
#define cellGcmSetConvertSwizzleFormat GCMCONFIG(cellGcmSetConvertSwizzleFormat)
#define cellGcmSetInlineTransferPointer GCMCONFIG(cellGcmSetInlineTransferPointer)
#define cellGcmSetTransferDataFormat GCMCONFIG(cellGcmSetTransferDataFormat)
#define cellGcmSetTransferDataOffset GCMCONFIG(cellGcmSetTransferDataOffset)
#define cellGcmSetTransferScaleMode GCMCONFIG(cellGcmSetTransferScaleMode)
#define cellGcmSetTransferScaleSurface GCMCONFIG(cellGcmSetTransferScaleSurface)
#define cellGcmSetTransferScaleSwizzle GCMCONFIG(cellGcmSetTransferScaleSwizzle)
#define cellGcmSetTransferReportData GCMCONFIG(cellGcmSetTransferReportData)
#define cellGcmSetDrawArrays GCMCONFIG(cellGcmSetDrawArrays)
#define cellGcmSetDrawIndexArray GCMCONFIG(cellGcmSetDrawIndexArray)
#define cellGcmSetDrawInlineArray GCMCONFIG(cellGcmSetDrawInlineArray)
#define cellGcmSetDrawInlineIndexArray32 GCMCONFIG(cellGcmSetDrawInlineIndexArray32)
#define cellGcmSetDrawInlineIndexArray16 GCMCONFIG(cellGcmSetDrawInlineIndexArray16)
#define cellGcmSetDrawInlineArrayPointer GCMCONFIG(cellGcmSetDrawInlineArrayPointer)
#define cellGcmSetVertexProgramConstantsPointer GCMCONFIG(cellGcmSetVertexProgramConstantsPointer)
#define cellGcmSetDrawInlineIndexArray32Pointer GCMCONFIG(cellGcmSetDrawInlineIndexArray32Pointer)
#define cellGcmSetDrawInlineIndexArray16Pointer GCMCONFIG(cellGcmSetDrawInlineIndexArray16Pointer)
#define cellGcmSetVertexProgramParameterBlockPointer GCMCONFIG(cellGcmSetVertexProgramParameterBlockPointer)
#define cellGcmSetWaitForIdle GCMCONFIG(cellGcmSetWaitForIdle)
#define cellGcmSetVertexData3f GCMCONFIG(cellGcmSetVertexData3f)
#define cellGcmSetVertexData2f GCMCONFIG(cellGcmSetVertexData2f)
#define cellGcmSetVertexData1f GCMCONFIG(cellGcmSetVertexData1f)
#define cellGcmSetVertexData4s GCMCONFIG(cellGcmSetVertexData4s)
#define cellGcmSetVertexDataScaled4s GCMCONFIG(cellGcmSetVertexDataScaled4s)
#define cellGcmSetVertexData2s GCMCONFIG(cellGcmSetVertexData2s)
#define cellGcmSetVertexData4ub GCMCONFIG(cellGcmSetVertexData4ub)
#define cellGcmSetTextureControlAlphaKill GCMCONFIG(cellGcmSetTextureControlAlphaKill)
#define cellGcmSetNoParanoidTextureFetches GCMCONFIG(cellGcmSetNoParanoidTextureFetches)
#define cellGcmSetInlineTransferAlignedPointer GCMCONFIG(cellGcmSetInlineTransferAlignedPointer)
#define cellGcmSetVertexProgramConstantsAlignedPointer GCMCONFIG(cellGcmSetVertexProgramConstantsAlignedPointer)
#define cellGcmSetVertexProgramParameterBlockAlignedPointer GCMCONFIG(cellGcmSetVertexProgramParameterBlockAlignedPointer)
#define cellGcmSetDrawInlineArrayAlignedPointer GCMCONFIG(cellGcmSetDrawInlineArrayAlignedPointer)
#define cellGcmSetDrawInlineIndexArray32AlignedPointer GCMCONFIG(cellGcmSetDrawInlineIndexArray32AlignedPointer)
#define cellGcmSetDrawInlineIndexArray16AlignedPointer GCMCONFIG(cellGcmSetDrawInlineIndexArray16AlignedPointer)
#endif // INCLUDED_GCMCONFIG_H

View File

@@ -0,0 +1,273 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
//--------------------------------------------------------------------------------------------------
// Inlcudes
//--------------------------------------------------------------------------------------------------
#include <libsn_spu.h>
#include "SpuMgr_spu.h"
#include "gcmdraw_spu.h"
#include "gcmdrawstate.h"
#include "gcmstate.h"
//--------------------------------------------------------------------------------------------------
// Globals
//--------------------------------------------------------------------------------------------------
ALIGN16 VertexShader9Data_t gVertexShaderData ALIGN16_POST;
ALIGN16 PixelShader9Data_t gPixelShaderData ALIGN16_POST;
ALIGN16 CellGcmContextData gGcmContext ALIGN16_POST ALIGN16_POST;
ALIGN16 CPs3gcmGlobalState g_ps3gcmGlobalState ALIGN16_POST ;
ALIGN16 CPs3gcmTextureLayout::Format_t g_ps3texFormats[PS3_TEX_MAX_FORMAT_COUNT] ALIGN16_POST;
ALIGN16 IDirect3DVertexDeclaration9 gDecl ALIGN16_POST;
ALIGN16 CellGcmContextData* gpGcmContext = &gGcmContext ALIGN16_POST;
ALIGN16 uint8 gFp[0x2000] ALIGN16_POST;
ALIGN16 uint8 gVp[0x2000] ALIGN16_POST;
ALIGN16 CPs3gcmTextureLayout gaLayout[D3D_MAX_TEXTURES] ALIGN16_POST;
ALIGN16 uint8 gaECB[3][0x1000];
ALIGN16 CPs3gcmLocalMemoryBlock gLmBlock ALIGN16_POST;
int gEA;
//--------------------------------------------------------------------------------------------------
// Routine to DMA in texture Layouts
//--------------------------------------------------------------------------------------------------
void GetTextureLayouts()
{
// Loop and DMA in texture layouts
for (uint32 lp = 0; lp < ARRAYSIZE(gaLayout); lp++)
{
uintp ea = gpGcmDrawState->m_textures[lp].m_eaLayout;
gEA = ea;
if (ea) gSpuMgr.DmaGetSAFE( &gaLayout[lp], ea, sizeof(CPs3gcmTextureLayout), SPU_DMAGET_TAG );
}
gSpuMgr.DmaDone( SPU_DMAGET_TAG_WAIT );
}
//--------------------------------------------------------------------------------------------------
// main()
//--------------------------------------------------------------------------------------------------
//--------------------------------------------------------------------------------------------------
// Protocol
//
// Simplest possible for starters :
// PPU sends SPU Mbx the last part of the drawcall to perform.
// SPU performs it and DMAs down the data. When it's complete it send the PPUMbx the length of the drawcall
// PPU prepares next packet which waits on PPUMbx completion before sending another.
//
// Relies on PPU calling cellGcmReserveMethodSize with 16k, so that the SPU can go ahead and DMA back the
// draw..
//--------------------------------------------------------------------------------------------------
int main(void)
{
gSpuMgr.Init();
// Initialise SPUs drawstate class
uint32 eaGcmDrawState;
gpGcmDrawState->Init();
uint8* pData = gpGcmDrawState->m_pData;
// Initialise context
gGcmContext.begin = (uint32*)MemAlloc_AllocAligned(GCM_DS_FIFOPERDRAW * GCM_NUMDRAWCALLS_SPU, 128);
gGcmContext.end = gGcmContext.begin + (GCM_DS_FIFOPERDRAW * GCM_NUMDRAWCALLS_SPU)/4;
gGcmContext.callback = NULL;
// Pull in globalstate
volatile uint32 eagGlobalState;
gSpuMgr.ReadMailbox( (uint32_t *) &eagGlobalState );
gSpuMgr.DmaGetUNSAFE( &g_ps3gcmGlobalState, eagGlobalState, SPUMGR_ALIGN_UP( sizeof(g_ps3gcmGlobalState), 16 ), SPU_DMAGET_TAG );
gSpuMgr.DmaDone( SPU_DMAGET_TAG_WAIT );
while(1)
{
uint32 startidx, count, loop;
gSpuMgr.ReadMailbox( (uint32_t *) &startidx );
count = startidx >>16;
startidx &= 0xFFFF;
gpGcmContext->current = gpGcmContext->begin;
// Loop over the drawstates
for (loop = 0; loop < count; loop++)
{
uint32 idx = (startidx +loop) % GCM_DRAWSTATE_MAX;
eaGcmDrawState = g_ps3gcmGlobalState.m_eaDrawStates + (idx*sizeof(CGcmDrawState));
// Read drawstate
gSpuMgr.DmaGetUNSAFE( gpGcmDrawState, eaGcmDrawState, SPUMGR_ALIGN_UP( DRAWSTATE_SIZEOFDMA, 16 ), SPU_DMAGET_TAG );
gSpuMgr.DmaDone( SPU_DMAGET_TAG_WAIT );
// Read Fixed Data
gSpuMgr.DmaGetUNSAFE( &gFixedData[0], uintp(gpGcmDrawState->m_pFixed), SPUMGR_ALIGN_UP(sizeof(gFixedData[0]), 16), SPU_DMAGET_TAG );
gSpuMgr.DmaDone( SPU_DMAGET_TAG_WAIT );
gpGcmDrawState->m_pFixed = &gFixedData[0];
// Read Packed Data
uint32* pParam = gpGcmDrawState->m_param;
if (gpGcmDrawState->m_cmd & 0x80000000) snPause();
gpGcmDrawState->m_cmd &= 0x7fffffff;
uint32 packSize = gpGcmDrawState->m_pDataCursor - gpGcmDrawState->m_pData;
gSpuMgr.DmaGetUNSAFE( pData, uintp(gpGcmDrawState->m_pData), SPUMGR_ALIGN_UP( packSize, 16 ), SPU_DMAGET_TAG );
gpGcmDrawState->m_pData = pData;
gpGcmDrawState->m_pDataCursor = pData + packSize;
// DMA in any ECBs we will need...
for ( uint32 lp = 0; lp < 3; lp++ )
{
if (gpGcmDrawState->m_aECB[lp])
{
gSpuMgr.DmaGetSAFE( gaECB[lp], uintp(gpGcmDrawState->m_aECB[lp]), gpGcmDrawState->m_aSizeECB[lp], SPU_DMAGET_TAG );
gpGcmDrawState->m_aECB[lp] = gaECB[lp];
}
}
gSpuMgr.DmaDone( SPU_DMAGET_TAG_WAIT );
// Read Pixel Shader and Vertex Shader
if ( (gpGcmDrawState->m_cmd != CmdCommitStates) && (gpGcmDrawState->m_cmd != CmdEndFrame ))
{
if(gpGcmDrawState->m_pVertexShaderData)
{
gSpuMgr.DmaGetUNSAFE( &gVertexShaderData, uintp(gpGcmDrawState->m_pVertexShaderData), SPUMGR_ALIGN_UP( sizeof(gVertexShaderData), 16 ), SPU_DMAGET_TAG );
gpGcmDrawState->m_pVertexShaderData = &gVertexShaderData;
gSpuMgr.DmaDone( SPU_DMAGET_TAG_WAIT );
// FPHeader, UCode, patches etc...
uintp ea = uintp(gVertexShaderData.m_pVertexShaderCmdBuffer);
gSpuMgr.DmaGetUNSAFE( &gVp, ea, SPUMGR_ALIGN_UP((gVertexShaderData.m_nVertexShaderCmdBufferWords*4),16), SPU_DMAGET_TAG );
gVertexShaderData.m_pVertexShaderCmdBuffer = (uint32*)gVp;
}
if(gpGcmDrawState->m_pPixelShaderData)
{
// PS Data
gSpuMgr.DmaGetUNSAFE( &gPixelShaderData, uintp(gpGcmDrawState->m_pPixelShaderData), SPUMGR_ALIGN_UP( sizeof(gPixelShaderData), 16 ), SPU_DMAGET_TAG );
gpGcmDrawState->m_pPixelShaderData = &gPixelShaderData;
gSpuMgr.DmaDone( SPU_DMAGET_TAG_WAIT );
// FPHeader, UCode, patches etc...
uintp ea = uintp(gPixelShaderData.m_eaFp);
gSpuMgr.DmaGetUNSAFE( &gFp, ea, SPUMGR_ALIGN_UP(gPixelShaderData.m_nTotalSize,16), SPU_DMAGET_TAG );
gPixelShaderData.m_eaFp = (FpHeader_t*)gFp;
}
// Decl
gSpuMgr.DmaGetUNSAFE( &gDecl, uintp(pParam[0]), SPUMGR_ALIGN_UP( sizeof(gDecl), 16 ), SPU_DMAGET_TAG );
// Texture Fomrats
gSpuMgr.DmaGetUNSAFE( &g_ps3texFormats, uintp(pParam[4]), SPUMGR_ALIGN_UP( sizeof(g_ps3texFormats), 16 ), SPU_DMAGET_TAG );
gSpuMgr.DmaDone( SPU_DMAGET_TAG_WAIT );
}
// Process command
switch(gpGcmDrawState->m_cmd)
{
case CmdCommitStates:
case CmdEndFrame:
gpGcmDrawState->CommitStates();
break;
case CmdDrawPrim:
gpGcmDrawState->CommitAll(&gDecl, pParam[1]);
// Draw
GCM_FUNC( cellGcmSetDrawIndexArray,
pParam[2], pParam[5],
CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16, CELL_GCM_LOCATION_LOCAL,
pParam[3] );
break;
case CmdDrawPrimUP:
{
D3DStreamDesc &dsd = g_dxGcmVertexStreamSources[0];
dsd.m_offset = 0;
dsd.m_stride = pParam[2];
dsd.m_vtxBuffer = ( IDirect3DVertexBuffer9 * )( uintp )1; // invalid pointer, but non-NULL to signal it's a real vertex buffer;
dsd.m_nLocalBufferOffset = 0;
gpGcmDrawState->CommitAll(&gDecl, 0);
GCM_FUNC(cellGcmSetCallCommand, pParam[1]);
}
break;
}
} // End Loop over drawstates
// DMA out packet
// first fill context to a 16B boundary
while (uintp(gpGcmContext->current) & 0xf)
{
*gpGcmContext->current = 0;
gpGcmContext->current++;
}
// Send to fifo
uint32 bytesUsed = (uint8*)gpGcmContext->current - (uint8*)gpGcmContext->begin;
gSpuMgr.DmaSync();
gSpuMgr.DmaPut(gpGcmDrawState->m_eaOutputFIFO, (void*)gpGcmContext->begin,
bytesUsed, SPU_DMAPUT_TAG);
gSpuMgr.DmaDone(SPU_DMAPUT_TAG_WAIT);
// Send to SPU mailbox
gSpuMgr.WriteMailbox(gpGcmDrawState->m_eaOutputFIFO + bytesUsed);
}
}

View File

@@ -0,0 +1,33 @@
#ifndef INCLUDED_GCMDRAW_SPU_H
#define INCLUDED_GCMDRAW_SPU_H
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#ifdef SPU
#include "SpuMgr_spu.h"
#else
#include "tier0/platform.h"
#include "tier0/dbg.h"
#include "cell\gcm.h"
#include "SpuMgr_ppu.h"
#endif
//--------------------------------------------------------------------------------------------------
// Defines for the DMA tags
//--------------------------------------------------------------------------------------------------
#define SPU_DMAGET_TAG 0
#define SPU_DMAGET_TAG_WAIT ( 1 << SPU_DMAGET_TAG )
#define SPU_DMAPUT_TAG 1
#define SPU_DMAPUT_TAG_WAIT ( 1 << SPU_DMAPUT_TAG )
#endif // INCLUDED_GCMDRAW_SPU_H

View File

@@ -0,0 +1,708 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Per draw call gcm state
//
//==================================================================================================
#define PPU_DRAW 0
#ifndef SPU
#define CELL_GCM_MEMCPY memcpy // PPU SNC has no such intrinsic
#endif
#ifndef SPU
#include "sys/memory.h"
#include "sysutil/sysutil_sysparam.h"
#include "cell/sysmodule.h"
#include "tier0/platform.h"
#include "tier0/dbg.h"
#include "tier1/utlbuffer.h"
#include "cell/gcm.h"
#include "gcmconfig.h"
#include "ps3gcmmemory.h"
#include "gcmstate.h"
#include "gcmlabels.h"
#include "gcmdrawstate.h"
#include "ps3/ps3_helpers.h"
#include <materialsystem/imaterialsystem.h>
#include <vprof.h>
#include "tier0/memdbgon.h"
#else
#include "spumgr_spu.h"
#include "gcmdrawstate.h"
#endif
//--------------------------------------------------------------------------------------------------
// Globals
//--------------------------------------------------------------------------------------------------
ALIGN128 CGcmDrawState gGcmDrawState[GCM_DRAWSTATE_MAX] ALIGN128_POST;
CGcmDrawState* gpGcmDrawState = &gGcmDrawState[0];
int g_bZcullAuto = 1;
int g_nZcullDefault = 100;
int g_nZcullMoveForward = 100;
int g_nZcullPushBack = 100;
SetVertexDataArrayCache_t g_cacheSetVertexDataArray[ D3D_MAX_STREAMS ];
vec_float4 g_aFPConst[GCM_DS_MAXFPCONST] = {0,};
vec_float4 g_aVPConst[GCM_DS_MAXVPCONST] = {0,};
D3DStreamDesc g_dxGcmVertexStreamSources[D3D_MAX_STREAMS];
uint32 g_UPHigh = 0;
uint32 g_UPFrame;
#ifndef SPU
ALIGN16 uint8 g_aDynECB[GCM_DS_MAXDYNECB] ALIGN16_POST; // Ring buffer of dynamic cmds
uint32 g_nDynECBIdx = 0;
#endif
#ifndef SPU
ALIGN128 CGcmDrawState::FixedData gFixedData[GCM_DRAWSTATE_MAX] ALIGN128_POST;
#else
ALIGN128 CGcmDrawState::FixedData gFixedData[1] ALIGN128_POST;
#endif
#ifndef SPU
ALIGN128 uint8 gPackData[GCM_DRAWSTATE_MAX][GCM_DS_MAXDATAPERDRAWCALL] ALIGN128_POST;
#else
ALIGN128 uint8 gPackData[1][GCM_DS_MAXDATAPERDRAWCALL] ALIGN128_POST;
#endif
//--------------------------------------------------------------------------------------------------
// DX lookups etc..
//--------------------------------------------------------------------------------------------------
// THese tables are auto-generated in dxabstract.cpp, UnpackD3DRSITable()
// They provide renderstate classes and their default values....
uint8 g_d3drs_defvalue_indices[D3DRS_VALUE_LIMIT] =
{ 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0300 | 1, 0300 | 2, 0100 | 3, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0300 | 1, 0300 | 0, 0100 | 4, 0000 | 0, 0000 | 0, 0300 | 1, 0300 | 1, 0000 | 0, 0300 | 2, 0300 | 1, 0300 | 0, 0300 | 5, 0100 | 0, 0300 | 1, 0300 | 0, 0100 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0300 | 0, 0300 | 0, 0300 | 0, 0300 | 6, 0300 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0300 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0300 | 0, 0300 | 4, 0300 | 4, 0300 | 4, 0300 | 7, 0300 | 0, 0300 | 8, 0300 | 8, 0100 | 8, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0000 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0200 | 4, 0000 | 0, 0000 | 0, 0100 | 0, 0300 | 0, 0100 | 4, 0100 | 4, 0100 | 0, 0000 | 0, 0100 | 0, 0100 | 3, 0100 | 0, 0100 | 0, 0000 | 0, 0000 | 0, 0100 | 0, 0300 | 0, 0000 | 0, 0100 | 6, 0100 | 6, 0100 | 0, 0100 | 0, 0100 | 6, 0100 | 0, 0100 | 0, 0300 | 4, 0300 | 8, 0100 | 0, 0000 | 0, 0100 | 0, 0100 | 9, 0100 | 0, 0300 | 4, 0000 | 0, 0100 | 0, 0300 | 4, 0100 | 2, 0100 | 4, 0300 | 0, 0300 | 0, 0100 | 0, 0000 | 0, 0100 | 6, 0100 | 6, 0100 | 0, 0100 | 0, 0100 | 6, 0100 | 0, 0100 | 0, 0300 | 0, 0300 | 4, 0300 | 4, 0300 | 4, 0300 | 7, 0300 | 10, 0300 | 10, 0300 | 10, 0100 | 8, 0300 | 0, 0300 | 0, 0000 | 0, 0000 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 0, 0100 | 3, 0100 | 4, 0100 | 4};
uint32 g_d3drs_defvalues[11] =
{ 0x0, 0x31415926, 0x3, 0x2, 0x1, 0x7, 0x3F800000, 0x8, 0xFFFFFFFF, 0x42800000, 0xF };
uint16 dxtogl_blendop[7] =
{
/*invalid*/CELL_GCM_FUNC_ADD,
CELL_GCM_FUNC_ADD,
CELL_GCM_FUNC_SUBTRACT,
CELL_GCM_FUNC_REVERSE_SUBTRACT,
CELL_GCM_MIN,
CELL_GCM_MAX,
/*invalid*/CELL_GCM_FUNC_ADD,
};
uint32 dxtogl_stencilmode[10] =
{
/*invalid*/ CELL_GCM_KEEP,
/*D3DSTENCILOP_KEEP*/ CELL_GCM_KEEP,
/*D3DSTENCILOP_ZERO*/ CELL_GCM_ZERO,
/*D3DSTENCILOP_REPLACE*/ CELL_GCM_REPLACE,
/*D3DSTENCILOP_INCRSAT*/ CELL_GCM_INCR,
/*D3DSTENCILOP_DECRSAT*/ CELL_GCM_DECR,
/*D3DSTENCILOP_INVERT*/ CELL_GCM_INVERT,
/*D3DSTENCILOP_INCR*/ CELL_GCM_INCR_WRAP,
/*D3DSTENCILOP_DECR*/ CELL_GCM_DECR_WRAP,
/*invalid*/ CELL_GCM_KEEP,
};
// addressing modes
// 1 D3DTADDRESS_WRAP Tile the texture at every integer junction.
// D3DTADDRESS_MIRROR Similar to D3DTADDRESS_WRAP, except that the texture is flipped at every integer junction.
// 3 D3DTADDRESS_CLAMP Texture coordinates outside the range [0.0, 1.0] are set to the texture color at 0.0 or 1.0, respectively.
// 4 D3DTADDRESS_BORDER Texture coordinates outside the range [0.0, 1.0] are set to the border color.
// D3DTADDRESS_MIRRORONCE Similar to D3DTADDRESS_MIRROR and D3DTADDRESS_CLAMP.
// Takes the absolute value of the texture coordinate (thus, mirroring around 0),
// and then clamps to the maximum value. The most common usage is for volume textures,
// where support for the full D3DTADDRESS_MIRRORONCE texture-addressing mode is not
// necessary, but the data is symmetric around the one axis.
uint8 dxtogl_addressMode[6] =
{
CELL_GCM_TEXTURE_WRAP, // no zero entry
CELL_GCM_TEXTURE_WRAP, // from D3DTADDRESS_WRAP
CELL_GCM_TEXTURE_MIRROR, // from D3DTADDRESS_MIRROR
CELL_GCM_TEXTURE_CLAMP_TO_EDGE, // from D3DTADDRESS_CLAMP
CELL_GCM_TEXTURE_BORDER, // from D3DTADDRESS_BORDER
CELL_GCM_TEXTURE_MIRROR_ONCE_BORDER, // no D3DTADDRESS_MIRRORONCE support
};
uint8 dxtogl_anisoIndexHalf[32] = // indexed by [ dxsamp->maxAniso / 2 ]
{
CELL_GCM_TEXTURE_MAX_ANISO_1, // 0-1
CELL_GCM_TEXTURE_MAX_ANISO_2, // 2-3
CELL_GCM_TEXTURE_MAX_ANISO_4, // 4-5
CELL_GCM_TEXTURE_MAX_ANISO_6, // 6-7
CELL_GCM_TEXTURE_MAX_ANISO_8, // 8-9
CELL_GCM_TEXTURE_MAX_ANISO_10, // 10-11
CELL_GCM_TEXTURE_MAX_ANISO_12, // 12-13
CELL_GCM_TEXTURE_MAX_ANISO_16, // 14-15
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 16
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 18
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 20
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 22
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 24
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 26
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 28
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 30
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 32
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 34
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 36
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 38
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 40
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 42
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 44
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 46
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 48
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 50
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 52
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 54
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 56
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 58
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 60
CELL_GCM_TEXTURE_MAX_ANISO_16, // ... rest >= 62
};
uint8 dxtogl_minFilter[4][4] = // indexed by _D3DTEXTUREFILTERTYPE on both axes: [row is min filter][col is mip filter].
{
/* mip filter ---------------> D3DTEXF_NONE D3DTEXF_POINT D3DTEXF_LINEAR (D3DTEXF_ANISOTROPIC not applicable to mip filter) */
/* min = D3DTEXF_NONE */ { CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_NEAREST_NEAREST, CELL_GCM_TEXTURE_NEAREST_LINEAR, CELL_GCM_TEXTURE_NEAREST }, // D3DTEXF_NONE we just treat like POINT
/* min = D3DTEXF_POINT */ { CELL_GCM_TEXTURE_NEAREST, CELL_GCM_TEXTURE_NEAREST_NEAREST, CELL_GCM_TEXTURE_NEAREST_LINEAR, CELL_GCM_TEXTURE_NEAREST },
/* min = D3DTEXF_LINEAR */ { CELL_GCM_TEXTURE_LINEAR, CELL_GCM_TEXTURE_LINEAR_NEAREST, CELL_GCM_TEXTURE_LINEAR_LINEAR, CELL_GCM_TEXTURE_NEAREST },
/* min = D3DTEXF_ANISOTROPIC */ { CELL_GCM_TEXTURE_LINEAR, CELL_GCM_TEXTURE_LINEAR_NEAREST, CELL_GCM_TEXTURE_LINEAR_LINEAR, CELL_GCM_TEXTURE_NEAREST }, // no diff from prior row, set maxAniso to effect the sampling
};
uint8 dxtogl_magFilter[4] = // indexed by _D3DTEXTUREFILTERTYPE
{
CELL_GCM_TEXTURE_NEAREST, // D3DTEXF_NONE not applicable to mag filter but we handle it like POINT (mat_showmiplevels hits this)
CELL_GCM_TEXTURE_NEAREST, // D3DTEXF_POINT
CELL_GCM_TEXTURE_LINEAR, // D3DTEXF_LINEAR
CELL_GCM_TEXTURE_LINEAR, // D3DTEXF_ANISOTROPIC (aniso will be driven by setting maxAniso, not by a GL filter mode)
};
//--------------------------------------------------------------------------------------------------
// Send to SPU
//--------------------------------------------------------------------------------------------------
#ifndef SPU
int gSpuJobIssued = 0;
uint32 gSpuStartIdx = 0;
uint32 gSpuCount = 0;
//--------------------------------------------------------------------------------------------------
// SPU DRAW CODE
//--------------------------------------------------------------------------------------------------
#if !PPU_DRAW
void CGcmDrawState::SendToSpu()
{
SpuTaskHandle *pTask = &g_ps3gcmGlobalState.m_spuHandle;
// Get this drawcall indx and the next
uint32 idx = gpGcmDrawState - gGcmDrawState;
uint32 nextidx = (idx + 1) % GCM_DRAWSTATE_MAX;
gSpuCount ++;
// Move gpGcmDrawState to the next set of Data
CGcmDrawState* pPrevDrawState = gpGcmDrawState;
gpGcmDrawState = &gGcmDrawState[nextidx];
gpGcmDrawState->m_shaderVxConstants = pPrevDrawState->m_shaderVxConstants;
gpGcmDrawState->m_pPixelShaderData = pPrevDrawState->m_pPixelShaderData;
gpGcmDrawState->m_pVertexShaderData = pPrevDrawState->m_pVertexShaderData;
gpGcmDrawState->m_nBackBufferSize[0] = pPrevDrawState->m_nBackBufferSize[0];
gpGcmDrawState->m_nBackBufferSize[1] = pPrevDrawState->m_nBackBufferSize[1];
gpGcmDrawState->m_pDataCursor = gpGcmDrawState->m_pData;
gpGcmDrawState->m_dirtySamplersMask = 0;
gpGcmDrawState->m_dirtyCachesMask = 0;
gpGcmDrawState->m_dirtyStatesMask = 0;
gpGcmDrawState->m_nFreeLabel = 0;
memset(gpGcmDrawState->m_pFixed->m_aSamplerIdx, 0xff, sizeof(m_pFixed->m_aSamplerIdx));
gpGcmDrawState->m_pFixed->m_nSampler = 0;
gpGcmDrawState->m_pFixed->m_nInstanced = 0;
gpGcmDrawState->m_nNumECB = 0;
memset(gpGcmDrawState->m_aECB, 0, sizeof(m_aECB));
if ( (gSpuCount < 4) && (m_cmd != CmdEndFrame) ) return;
// Send the state(s) to the SPU
// Wait on previous drawcall
if (gSpuJobIssued)
{
uint32 fifoPosn;
gSpuMgr.ReadMailbox(pTask, &fifoPosn);
gpGcmContext->current = (uint32*)fifoPosn;
}
// Makesure we have 16K at least, per drawcall (we issue 4 calls at a time)
cellGcmReserveMethodSizeInline(gpGcmContext, (GCM_DS_FIFOPERDRAW*GCM_NUMDRAWCALLS_SPU)/4); // 16K per draw call, /4 because api takes wordcount
// Makesure FIFO is on a 16B boundary
while (uintp(gpGcmContext->current) & 0xf)
{
*gpGcmContext->current = 0;
gpGcmContext->current++;
}
// Build count and startidx parameter to send to SPU
uint32 mailboxparam = (gSpuCount<<16) | gSpuStartIdx;
//Send this drawstate
m_eaOutputFIFO = (uint32)gpGcmContext->current;
__asm ( "eieio" );
gSpuMgr.WriteMailbox(pTask, mailboxparam);
gSpuJobIssued = 1;
// If it's an endframe, wait for result now
// comment out this if to always wait for the dma to come back
if (m_cmd == CmdEndFrame)
{
uint32 fifoPosn;
gSpuMgr.ReadMailbox(pTask, &fifoPosn);
gpGcmContext->current = (uint32*)fifoPosn;
gSpuJobIssued = 0;
}
gSpuStartIdx = nextidx;
gSpuCount = 0;
}
#else // PPU_DRAW.....
//--------------------------------------------------------------------------------------------------
// Draw on PPU
//--------------------------------------------------------------------------------------------------
void CGcmDrawState::SendToSpu()
{
// Makesure we have 16K at least
cellGcmReserveMethodSizeInline(gpGcmContext, GCM_DS_FIFOPERDRAW/4); // 16K per draw call
// Makesure FIFO is on a 16B boundary
while (uintp(gpGcmContext->current) & 0xf)
{
*gpGcmContext->current = 0;
gpGcmContext->current++;
}
// Process cmd on PPU
switch (m_cmd)
{
case CmdCommitStates:
case CmdEndFrame:
if (m_nFreeLabel) UnpackSetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, m_nFreeLabel);
if ( m_dirtyStatesMask & kDirtyResetRsx) UnpackResetRsxState();
if (m_dirtyStatesMask & kDirtyZeroAllPSConsts) ZeroFPConsts();
if (m_dirtyStatesMask & kDirtyZeroAllVSConsts) ZeroVPConsts();
UnpackData(); // Pulls out pixel shader consts and sets vertex shader consts
CommitRenderStates();
break;
case CmdDrawPrim:
{
gpGcmDrawState->CommitAll((IDirect3DVertexDeclaration9 *)m_param[0], m_param[1]);
// Draw
GCM_FUNC( cellGcmSetDrawIndexArray,
m_param[2], m_param[5],
CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16, CELL_GCM_LOCATION_LOCAL,
m_param[3] );
}
break;
case CmdDrawPrimUP:
{
D3DStreamDesc &dsd = g_dxGcmVertexStreamSources[0];
dsd.m_offset = 0;
dsd.m_stride = m_param[2];
dsd.m_vtxBuffer = ( IDirect3DVertexBuffer9 * )( uintp )1; // invalid pointer, but non-NULL to signal it's a real vertex buffer;
dsd.m_nLocalBufferOffset = 0;
gpGcmDrawState->CommitAll((IDirect3DVertexDeclaration9 *)m_param[0], 0);
GCM_FUNC(cellGcmSetCallCommand, m_param[1]);
}
break;
}
// Flip to the other set of Data
if (gpGcmDrawState->m_pData == gPackData1)
{
gpGcmDrawState->m_pData = gPackData2;
gpGcmDrawState->m_pFixed = &gFixedData2;
}
else
{
gpGcmDrawState->m_pData = gPackData1;
gpGcmDrawState->m_pFixed = &gFixedData1;
}
gpGcmDrawState->m_pDataCursor = gpGcmDrawState->m_pData;
m_dirtySamplersMask = 0;
m_dirtyCachesMask = 0;
m_dirtyStatesMask = 0;
m_nFreeLabel = 0;
memset(m_pFixed->m_aSamplerIdx, 0xff, sizeof(m_pFixed->m_aSamplerIdx));
m_pFixed->m_nSampler = 0;
m_pFixed->m_nInstanced = 0;
m_nNumECB = 0;
memset(m_aECB, 0, sizeof(m_aECB));
}
#endif // ndef SPU
#endif
//--------------------------------------------------------------------------------------------------
// test func to try to find corrupted ECBs
//--------------------------------------------------------------------------------------------------
void CGcmDrawState::TestCommandBuffer( uint8 *pCmdBuf )
{
uint8* pStart = pCmdBuf;
uint8 *pReturnStack[20];
uint8 **pSP = &pReturnStack[ARRAYSIZE(pReturnStack)];
uint8 *pLastCmd;
for(;;)
{
uint8 *pCmd=pCmdBuf;
int nCmd = GetData<int>( pCmdBuf );
if (nCmd > CBCMD_SET_VERTEX_SHADER_NEARZFARZ_STATE) DebuggerBreak();
switch( nCmd )
{
case CBCMD_END:
{
if ( pSP == &pReturnStack[ARRAYSIZE(pReturnStack)] )
return;
else
{
// pop pc
pCmdBuf = *( pSP ++ );
break;
}
}
case CBCMD_JUMP:
pCmdBuf = GetData<uint8 *>( pCmdBuf + sizeof( int ) );
break;
case CBCMD_JSR:
{
Assert( pSP > &(pReturnStack[0] ) );
// *(--pSP ) = pCmdBuf + sizeof( int ) + sizeof( uint8 *);
// pCmdBuf = GetData<uint8 *>( pCmdBuf + sizeof( int ) );
TestCommandBuffer( GetData<uint8 *>( pCmdBuf + sizeof( int ) ) );
pCmdBuf = pCmdBuf + sizeof( int ) + sizeof( uint8 *);
break;
}
case CBCMD_SET_PIXEL_SHADER_FLOAT_CONST:
{
int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
int nNumConsts = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int );
break;
}
case CBCMD_SETPIXELSHADERFOGPARAMS:
{
Error("Pixel Shader Fog params not supported\n");
break;
}
case CBCMD_STORE_EYE_POS_IN_PSCONST:
{
pCmdBuf += 2 * sizeof( int ) + sizeof( float );
break;
}
case CBCMD_SET_DEPTH_FEATHERING_CONST:
{
// int nConst = GetData<int>( pCmdBuf + sizeof( int ) );
// float fDepthBlendScale = GetData<float>( pCmdBuf + 2 * sizeof( int ) );
pCmdBuf += 2 * sizeof( int ) + sizeof( float );
// SetDepthFeatheringPixelShaderConstant( nConst, fDepthBlendScale );
break;
}
case CBCMD_SET_VERTEX_SHADER_FLOAT_CONST:
{
int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
int nNumConsts = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
float const *pValues = reinterpret_cast< float const *> ( pCmdBuf + 3 * sizeof( int ) );
pCmdBuf += nNumConsts * 4 * sizeof( float ) + 3 * sizeof( int );
break;
}
case CBCMD_BIND_PS3_TEXTURE:
{
CPs3BindTexture_t tex = GetData<CPs3BindTexture_t> (pCmdBuf + sizeof( int ));
if (tex.m_pLmBlock->Offset() & 0x7e) DebuggerBreak();
pCmdBuf += sizeof(int) + sizeof(tex);
break;
}
case CBCMD_BIND_PS3_STANDARD_TEXTURE:
{
CPs3BindTexture_t tex = GetData<CPs3BindTexture_t> (pCmdBuf + sizeof( int ));
if (m_pFixed->m_nInstanced)
{
uint32 nBindFlags = tex.m_nBindFlags;
uint32 nSampler = tex.m_sampler;
switch (tex.m_boundStd)
{
case TEXTURE_LOCAL_ENV_CUBEMAP:
if (m_pFixed->m_nInstanced & GCM_DS_INST_ENVMAP) tex = m_pFixed->m_instanceEnvCubemap;
break;
case TEXTURE_LIGHTMAP:
if (m_pFixed->m_nInstanced & GCM_DS_INST_LIGHTMAP) tex = m_pFixed->m_instanceLightmap;
break;
case TEXTURE_PAINT:
if (m_pFixed->m_nInstanced & GCM_DS_INST_PAINTMAP) tex = m_pFixed->m_instancePaintmap;
break;
}
tex.m_nBindFlags = nBindFlags;
tex.m_sampler = nSampler;
}
// Test texture
if (tex.m_pLmBlock->Offset() & 0x7e) DebuggerBreak();
pCmdBuf += sizeof(int) + sizeof(tex);
break;
}
case CBCMD_PS3TEX:
{
pCmdBuf += sizeof(int) + (CBCMD_MAX_PS3TEX*sizeof(int));
break;
}
case CBCMD_LENGTH:
{
pCmdBuf += sizeof(int) *2 ;
break;
}
case CBCMD_SET_PSHINDEX:
{
// int nIdx = GetData<int>( pCmdBuf + sizeof( int ) );
// ShaderManager()->SetPixelShaderIndex( nIdx );
// pCmdBuf += 2 * sizeof( int );
Error("PSHINDEX Not Supported\n");
break;
}
case CBCMD_SET_VSHINDEX:
{
// int nIdx = GetData<int>( pCmdBuf + sizeof( int ) );
// ShaderManager()->SetVertexShaderIndex( nIdx );
pCmdBuf += 2 * sizeof( int );
Error("VSHINDEX Not Supported\n");
break;
}
case CBCMD_SET_VERTEX_SHADER_FLASHLIGHT_STATE:
{
// int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
// SetVertexShaderConstantInternal( nStartConst, m_FlashlightWorldToTexture.Base(), 4, false );
// pCmdBuf += 2 * sizeof( int );
// Error("Flashlight unsupported\n");
pCmdBuf += 2 * sizeof( int );
break;
}
case CBCMD_SET_VERTEX_SHADER_NEARZFARZ_STATE:
{
Error("SetVertexShaderNearAndFarZ NOt SUPPORTED\n");
// int nStartConst = GetData<int>( pCmdBuf + sizeof( int ) );
//
// VMatrix m;
//
// m = m_MaterialProjectionMatrix;
//
// // GetMatrix( MATERIAL_PROJECTION, m.m[0] );
//
// // m[2][2] = F/(N-F) (flip sign if RH)
// // m[3][2] = NF/(N-F)
//
// float vNearFar[4];
//
// float N = m[3][2] / m[2][2];
// float F = (m[3][2]*N) / (N + m[3][2]);
//
// vNearFar[0] = N;
// vNearFar[1] = F;
//
// SetVertexShaderConstantInternal( nStartConst, vNearFar, 1, false );
pCmdBuf += 2 * sizeof( int );
break;
}
case CBCMD_SET_PIXEL_SHADER_FLASHLIGHT_STATE:
{
// int nLightSampler = GetData<int>( pCmdBuf + sizeof( int ) );
// int nDepthSampler = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
// int nShadowNoiseSampler = GetData<int>( pCmdBuf + 3 * sizeof( int ) );
// int nColorConst = GetData<int>( pCmdBuf + 4 * sizeof( int ) );
// int nAttenConst = GetData<int>( pCmdBuf + 5 * sizeof( int ) );
// int nOriginConst = GetData<int>( pCmdBuf + 6 * sizeof( int ) );
// int nDepthTweakConst = GetData<int>( pCmdBuf + 7 * sizeof( int ) );
// int nScreenScaleConst = GetData<int>( pCmdBuf + 8 * sizeof( int ) );
// int nWorldToTextureConstant = GetData<int>( pCmdBuf + 9 * sizeof( int ) );
// bool bFlashlightNoLambert = GetData<int>( pCmdBuf + 10 * sizeof( int ) ) != 0;
// bool bSinglePassFlashlight = GetData<int>( pCmdBuf + 11 * sizeof( int ) ) != 0;
// pCmdBuf += 12 * sizeof( int );
//
// ShaderAPITextureHandle_t hTexture = g_pShaderUtil->GetShaderAPITextureBindHandle( m_FlashlightState.m_pSpotlightTexture, m_FlashlightState.m_nSpotlightTextureFrame, 0 );
// BindTexture( (Sampler_t)nLightSampler, TEXTURE_BINDFLAGS_SRGBREAD, hTexture ); // !!!BUG!!!srgb or not?
//
// SetPixelShaderConstantInternal( nAttenConst, m_pFlashlightAtten, 1, false );
// SetPixelShaderConstantInternal( nOriginConst, m_pFlashlightPos, 1, false );
//
// m_pFlashlightColor[3] = bFlashlightNoLambert ? 2.0f : 0.0f; // This will be added to N.L before saturate to force a 1.0 N.L term
//
// // DX10 hardware and single pass flashlight require a hack scalar since the flashlight is added in linear space
// float flashlightColor[4] = { m_pFlashlightColor[0], m_pFlashlightColor[1], m_pFlashlightColor[2], m_pFlashlightColor[3] };
// if ( ( g_pHardwareConfig->UsesSRGBCorrectBlending() ) || ( bSinglePassFlashlight ) )
// {
// // Magic number that works well on the 360 and NVIDIA 8800
// flashlightColor[0] *= 2.5f;
// flashlightColor[1] *= 2.5f;
// flashlightColor[2] *= 2.5f;
// }
//
// SetPixelShaderConstantInternal( nColorConst, flashlightColor, 1, false );
//
// if ( nWorldToTextureConstant >= 0 )
// {
// SetPixelShaderConstantInternal( nWorldToTextureConstant, m_FlashlightWorldToTexture.Base(), 4, false );
// }
//
// BindStandardTexture( (Sampler_t)nShadowNoiseSampler, TEXTURE_BINDFLAGS_NONE, TEXTURE_SHADOW_NOISE_2D );
// if( m_pFlashlightDepthTexture && m_FlashlightState.m_bEnableShadows && ShaderUtil()->GetConfig().ShadowDepthTexture() )
// {
// ShaderAPITextureHandle_t hDepthTexture = g_pShaderUtil->GetShaderAPITextureBindHandle( m_pFlashlightDepthTexture, 0, 0 );
// BindTexture( (Sampler_t)nDepthSampler, TEXTURE_BINDFLAGS_SHADOWDEPTH, hDepthTexture );
//
// SetPixelShaderConstantInternal( nDepthTweakConst, m_pFlashlightTweaks, 1, false );
//
// // Dimensions of screen, used for screen-space noise map sampling
// float vScreenScale[4] = {1280.0f / 32.0f, 720.0f / 32.0f, 0, 0};
// int nWidth, nHeight;
// BaseClass::GetBackBufferDimensions( nWidth, nHeight );
//
// int nTexWidth, nTexHeight;
// GetStandardTextureDimensions( &nTexWidth, &nTexHeight, TEXTURE_SHADOW_NOISE_2D );
//
// vScreenScale[0] = (float) nWidth / nTexWidth;
// vScreenScale[1] = (float) nHeight / nTexHeight;
// vScreenScale[2] = 1.0f / m_FlashlightState.m_flShadowMapResolution;
// vScreenScale[3] = 2.0f / m_FlashlightState.m_flShadowMapResolution;
// SetPixelShaderConstantInternal( nScreenScaleConst, vScreenScale, 1, false );
// }
// else
// {
// BindStandardTexture( (Sampler_t)nDepthSampler, TEXTURE_BINDFLAGS_NONE, TEXTURE_WHITE );
// }
// Error("Flashlight unsupported\n");
pCmdBuf += 12 * sizeof( int );
break;
}
case CBCMD_SET_PIXEL_SHADER_UBERLIGHT_STATE:
{
// int iEdge0Const = GetData<int>( pCmdBuf + sizeof( int ) );
// int iEdge1Const = GetData<int>( pCmdBuf + 2 * sizeof( int ) );
// int iEdgeOOWConst = GetData<int>( pCmdBuf + 3 * sizeof( int ) );
// int iShearRoundConst = GetData<int>( pCmdBuf + 4 * sizeof( int ) );
// int iAABBConst = GetData<int>( pCmdBuf + 5 * sizeof( int ) );
// int iWorldToLightConst = GetData<int>( pCmdBuf + 6 * sizeof( int ) );
pCmdBuf += 7 * sizeof( int );
//
// SetPixelShaderConstantInternal( iEdge0Const, m_UberlightRenderState.m_vSmoothEdge0.Base(), 1, false );
// SetPixelShaderConstantInternal( iEdge1Const, m_UberlightRenderState.m_vSmoothEdge1.Base(), 1, false );
// SetPixelShaderConstantInternal( iEdgeOOWConst, m_UberlightRenderState.m_vSmoothOneOverW.Base(), 1, false );
// SetPixelShaderConstantInternal( iShearRoundConst, m_UberlightRenderState.m_vShearRound.Base(), 1, false );
// SetPixelShaderConstantInternal( iAABBConst, m_UberlightRenderState.m_vaAbB.Base(), 1, false );
// SetPixelShaderConstantInternal( iWorldToLightConst, m_UberlightRenderState.m_WorldToLight.Base(), 4, false );
Error("Uberlight state unsupported\n");
break;
}
#ifndef NDEBUG
default:
Assert(0);
break;
#endif
}
pLastCmd = pCmd;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,56 @@
//================ Copyright (c) 1996-2010 Valve Corporation. All Rights Reserved. =================
#ifndef PS3GCMFUNC_H
#define PS3GCMFUNC_H
// this is the buffer that all PPU GCM functions assume is the normal command buffer,
// but it is not in IO-mapped memory and it's the SPU that picks up and submits it to RSX.
// it's a level of indirection necessary to interleave SPU and PPU calls to GCM
#define GCM_CTX gCellGcmCurrentContext
#if GCM_CTX_UNSAFE_MODE
#error "This mode is not supported any more. Use SPU draw mode."
#endif
extern int32_t SpuGcmCommandBufferReserveCallback( struct CellGcmContextData *context, uint32_t nCount );
#define GCM_CTX_RESERVE( WORDS ) SpuGcmCommandBufferReserveCallback( GCM_CTX, WORDS )
#define GCM_FUNC_NOINLINE( GCM_FUNCTION, ...) GCM_FUNCTION( GCM_CTX, ##__VA_ARGS__ )
#ifdef _CERT
#define GCM_PERF_RANGE( NAME )
#define GCM_PERF_PUSH_MARKER( NAME )
#define GCM_PERF_POP_MARKER( )
#define GCM_PERF_MARKER( NAME )
#else
class CGcmPerfAutoRange
{
public:
CGcmPerfAutoRange( const char * pName ){ GCM_FUNC_NOINLINE( cellGcmSetPerfMonPushMarker, pName ); }
~CGcmPerfAutoRange( ){ GCM_FUNC_NOINLINE( cellGcmSetPerfMonPopMarker ); }
};
#define GCM_PERF_RANGE( NAME ) CGcmPerfAutoRange _gcmAutoRange( NAME )
#define GCM_PERF_PUSH_MARKER( NAME ) GCM_FUNC_NOINLINE( cellGcmSetPerfMonPushMarker, NAME )
#define GCM_PERF_POP_MARKER( ) GCM_FUNC_NOINLINE( cellGcmSetPerfMonPopMarker )
#define GCM_PERF_MARKER( NAME ) GCM_FUNC_NOINLINE( cellGcmSetPerfMonMarker, ( NAME ) )
#endif
#define GCM_FUNC( GCM_FUNCTION, ...) \
{ \
uint nReserveWords = GCM_FUNCTION ## MeasureSizeInline( 0, ##__VA_ARGS__ ); \
GCM_CTX_RESERVE( nReserveWords ); \
GCM_FUNCTION ## UnsafeInline( GCM_CTX, ##__VA_ARGS__ ); \
}
extern void SpuGcmCommandBufferFlush();
#define GCM_CTX_FLUSH_CHECKPOINT() void SpuGcmCommandBufferFlush()
#define cellGcmFlush must_use_____g_ps3gcmGlobalState_CmdBufferFlush
#endif

View File

@@ -0,0 +1,36 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Labels etc..
//
//==================================================================================================
#ifndef INCLUDED_GCMLABELS_H
#define INCLUDED_GCMLABELS_H
enum GcmLabelEnum_t
{
GCM_LABEL_QUERY_FIRST = 64, // GCM reserves the first 64 labels, do not use them
GCM_LABEL_QUERY_LAST = GCM_LABEL_QUERY_FIRST + 99, // the last query label, inclusive
GCM_LABEL_FPPATCH_RING_SEG = 252,
GCM_LABEL_CALL_CMD_RING_SEG = 253, // Ring command buffer for DrawPrimUP and similar
GCM_LABEL_FLIP_CONTROL = 254,
GCM_LABEL_MEMORY_FREE = 255 // 255 is the very last possible index of a label
};
enum GcmReportEnum_t
{
// Used for occlusion queries
GCM_REPORT_QUERY_FIRST = 0,
GCM_REPORT_QUERY_LAST = GCM_REPORT_QUERY_FIRST + 512,
// Used for RSX perf monitoring ... Four timestamps. Start and finish of this frame. Start and finish of previous frame
GCM_REPORT_TIMESTAMP_FRAME_FIRST,
GCM_REPORT_TIMESTAMP_FRAME_LAST = GCM_REPORT_TIMESTAMP_FRAME_FIRST + 3,
// Used for Zcull stats
GCM_REPORT_ZCULL_STATS_0,
GCM_REPORT_ZCULL_STATS_1,
};
#endif // INCLUDED_GCMLABELS_H

View File

@@ -0,0 +1,848 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Gcm renderer state and util functions
//
//==================================================================================================
#ifndef SPU
#define CELL_GCM_MEMCPY memcpy // PPU SNC has no such intrinsic
#endif
#include "sys/memory.h"
#include "sysutil/sysutil_sysparam.h"
#include "cell/sysmodule.h"
#include "tier0/platform.h"
#include "tier0/dbg.h"
#include "tier1/utlbuffer.h"
#include "cell/gcm.h"
#include "gcmconfig.h"
#include "ps3gcmmemory.h"
#include "gcmstate.h"
#include "gcmlabels.h"
#include "gcmdrawstate.h"
#include "ps3/ps3_helpers.h"
#include <cell/gem.h> // PS3 move controller lib
#include "inputsystem/iinputsystem.h"
#include "memdbgon.h"
//--------------------------------------------------------------------------------------------------
// Golobals, GCM context, flip control init proto
//--------------------------------------------------------------------------------------------------
ALIGN128 CPs3gcmGlobalState g_ps3gcmGlobalState ALIGN128_POST;
ALIGN16 CellGcmContextData gGcmContext ALIGN16_POST;
CellGcmContextData* gpGcmContext;
CellGcmContextData gCallContext;
CellGcmContextData* gpCallContext = &gCallContext;
static void Gcm_InitFlipControl(void);
static volatile uint32_t *s_label_call_cmd_ring_seg; // pointer to the call cmd label
volatile uint32_t *g_label_fppatch_ring_seg; // Fp pacth label
//--------------------------------------------------------------------------------------------------
// Empty Ps
//--------------------------------------------------------------------------------------------------
uint8 g_dataShaderPsEmpty[] = {
0x00, 0x00, 0x1B, 0x5C, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, 0x01
, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x80
, 0x00, 0x00, 0x04, 0x18, 0x00, 0x00, 0x0A, 0xC5, 0x00, 0x00, 0x10, 0x05, 0xFF, 0xFF, 0xFF, 0xFF
, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50
, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00
, 0x43, 0x4F, 0x4C, 0x4F, 0x52, 0x00, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF
, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
, 0x1E, 0x7E, 0x7E, 0x00, 0xC8, 0x00, 0x1C, 0x9D, 0xC8, 0x00, 0x00, 0x01, 0xC8, 0x00, 0x00, 0x01
, 0x1E, 0x01, 0x01, 0x00, 0x28, 0x02, 0x1C, 0x9C, 0xC8, 0x00, 0x00, 0x01, 0xC8, 0x00, 0x00, 0x01
, 0x00, 0x00, 0x3F, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
//--------------------------------------------------------------------------------------------------
// Global GCM state class
//
// Global state, command buffers, RSX draw display buffers etc etc
//--------------------------------------------------------------------------------------------------
int32 CPs3gcmGlobalState::Init()
{
MEM_ALLOC_CREDIT_( "GCM INIT" );
Msg(">>>> Sizeof(CGcmDrawStateDma) %d \n", DRAWSTATE_SIZEOFDMA);
Msg(">>>> Sizeof(CGcmDrawState) %d \n", sizeof(CGcmDrawState));
// Create Raw SPU task for renderer acceleration
gSpuMgr.Init(1);
gSpuMgr.CreateSpuTask("rawspu_gcmdraw_spu.self", &m_spuHandle);
// Default to 60Hz
m_flipMode = 30;
// Video : display res, video buffer, gamma, RGB colour range
if( int nError= InitVideo() )
return nError;
// Alloc IO memory, Set address, size of main memory pool for RSX
CreateIoBuffers();
// Init GCM : Map IO memory, Create command buffers
if( int nError = InitGcm() )
return nError;
// Retrieve RSX local memory config
CellGcmConfig rsxConfig;
cellGcmGetConfiguration( &rsxConfig );
m_pLocalBaseAddress = rsxConfig.localAddress;
m_nLocalSize = rsxConfig.localSize;
cellGcmAddressToOffset( m_pLocalBaseAddress, &m_nLocalBaseOffset );
Assert( m_nLocalBaseOffset == 0 );
// Init local memory mgr
Ps3gcmLocalMemoryAllocator_Init();
// Create display buffers etc..
CreateRsxBuffers();
// Create Empty PS
m_pShaderPsEmpty = reinterpret_cast< CgBinaryProgram * >( g_dataShaderPsEmpty );
m_pShaderPsEmptyBuffer.Alloc( kAllocPs3GcmShader, m_pShaderPsEmpty->ucodeSize );
V_memcpy( m_pShaderPsEmptyBuffer.DataInLocalMemory(), ( (char*)m_pShaderPsEmpty ) + m_pShaderPsEmpty->ucode, m_pShaderPsEmpty->ucodeSize );
CgBinaryFragmentProgram *pCgFragmentProgram = ( CgBinaryFragmentProgram * )( uintp( m_pShaderPsEmpty ) + m_pShaderPsEmpty->program );
m_nPsEmptyAttributeInputMask = pCgFragmentProgram->attributeInputMask;
uint registerCount = pCgFragmentProgram->registerCount;
// NOTE: actual register count can be modified by specifying an artificial e.g. PS3REGCOUNT48 static combo to force it to 48
Assert( registerCount <= 48 );
if (registerCount < 2)
{
// register count must be [2, 48]
registerCount = 2;
}
uint8_t controlTxp = CELL_GCM_FALSE;
uint32 shCtrl0 = ( CELL_GCM_COMMAND_CAST( controlTxp ) << CELL_GCM_SHIFT_SET_SHADER_CONTROL_CONTROL_TXP )
& CELL_GCM_MASK_SET_SHADER_CONTROL_CONTROL_TXP;
shCtrl0 |= ( 1<<10 ) | ( registerCount << 24 );
shCtrl0 |= pCgFragmentProgram->depthReplace ? 0xE : 0x0;
shCtrl0 |= pCgFragmentProgram->outputFromH0 ? 0x00 : 0x40;
shCtrl0 |= pCgFragmentProgram->pixelKill ? 0x80 : 0x00;
m_nPsEmptyShaderControl0 = shCtrl0;
// Init glip control
m_fastFlip = 0;
Gcm_InitFlipControl();
// Address of draw states
m_eaDrawStates = uintp(gGcmDrawState);
// Give SPU program this class
gSpuMgr.WriteMailbox(&m_spuHandle, uintp(this));
return CELL_OK;
}
void CPs3gcmGlobalState::CreateIoBuffers()
{
m_nIoSize = GCM_IOSIZE;
if ((m_nIoSize & 0xFFFFF) != 0) // MB aligned
{
Error("No MB alignment %x\n\n", m_nIoSize);
}
// Try to allocate main memory that will be mapped to IO address space
// Actually mapped in in GcmInit, once gcm is going
sys_addr_t pIoAddress = NULL;
int nError = sys_memory_allocate( m_nIoSize, SYS_MEMORY_PAGE_SIZE_1M, &pIoAddress );
if ( CELL_OK != nError )
{
Error( "sys_memory_allocate failed to allocate %d bytes (err: %d)\n", m_nIoSize, nError );
}
m_pIoAddress = (void *)pIoAddress;
Msg( "======== GCM IO memory allocated @0x%p size = %d MB ========\n", m_pIoAddress, m_nIoSize / 1024 / 1024 );
// Call command buffer
m_pCallCmdBuffer = (void*)(uintp(pIoAddress) + GCM_DEFCMDBUFFSIZE);
// RSX main memory pool buffer
m_nRsxMainMemoryPoolBufferSize = GCM_MAINPOOLSIZE;
m_pRsxMainMemoryPoolBuffer = (void*)(uintp(pIoAddress) + GCM_DEFCMDBUFFSIZE + GCM_CALLCMDBUFFSIZE);
// Patch buffers
m_pPatchBuff = (uint8*)m_pRsxMainMemoryPoolBuffer + GCM_MAINPOOLSIZE;
}
int CPs3gcmGlobalState::InitGcm()
{
int32 result = cellGcmInit( GCM_DEFCMDBUFFSIZE, m_nIoSize, m_pIoAddress );
if ( result < CELL_OK )
return result;
gGcmContext = *gCellGcmCurrentContext;
gpGcmContext = &gGcmContext;
gpGcmContext->callback = CmdBufferFull;
// Set the flip mode etc...
// Get the offset delta
cellGcmAddressToOffset( m_pIoAddress, &m_nIoOffsetDelta );
m_nIoOffsetDelta -= uintp( m_pIoAddress );
// Setup call cmd buffer
m_nCallCmdBufferoffset = uintp(m_pCallCmdBuffer) + m_nIoOffsetDelta;
m_nCallWritePos = 0;
m_nCallReadSegment = 0;
s_label_call_cmd_ring_seg = cellGcmGetLabelAddress(GCM_LABEL_CALL_CMD_RING_SEG);
*s_label_call_cmd_ring_seg = 0;
// Setup Patch Buffers
m_nPatchIdx = 0;
m_nPatchReadSeg = 0;
g_label_fppatch_ring_seg = cellGcmGetLabelAddress(GCM_LABEL_FPPATCH_RING_SEG);
*g_label_fppatch_ring_seg = 0;
return CELL_OK;
}
int CPs3gcmGlobalState::InitVideo()
{
//////////////////////////////////////////////////////////////////////////
//
// Initialize m_display
//
CellVideoOutState videoOutState;
int result = cellVideoOutGetState( CELL_VIDEO_OUT_PRIMARY, 0, &videoOutState);
if ( result < CELL_OK )
return result;
CellVideoOutResolution resolution;
result = cellVideoOutGetResolution( videoOutState.displayMode.resolutionId, &resolution );
if ( result < CELL_OK )
return result;
// Always output scanout in system m_display resolution
m_nRenderSize[0] = resolution.width;
m_nRenderSize[1] = resolution.height;
// Handle special case: 1080p will be upsampled from 720p
if ( resolution.height >= 720 && CommandLine()->FindParm( "-480p" ) )
{
m_nRenderSize[0] = 640;
m_nRenderSize[1] = 480;
videoOutState.displayMode.resolutionId = CELL_VIDEO_OUT_RESOLUTION_480;
}
else if ( resolution.height >= 1080 && !CommandLine()->FindParm( "-1080p" ) )
{
m_nRenderSize[0] = 1280;
m_nRenderSize[1] = 720;
videoOutState.displayMode.resolutionId = CELL_VIDEO_OUT_RESOLUTION_720;
}
//////////////////////////////////////////////////////////////////////////
//
// Set video output
//
CellVideoOutConfiguration videocfg;
memset( &videocfg, 0, sizeof(videocfg) );
videocfg.resolutionId = videoOutState.displayMode.resolutionId;
videocfg.format = CELL_VIDEO_OUT_BUFFER_COLOR_FORMAT_X8R8G8B8;
videocfg.pitch = cellGcmGetTiledPitchSize( m_nRenderSize[0] * 4 );
m_nSurfaceRenderPitch = videocfg.pitch;
// Configure video output
result = cellVideoOutConfigure( CELL_VIDEO_OUT_PRIMARY, &videocfg, NULL, 0 );
if ( result < CELL_OK )
return result;
// Get the new video output
result = cellVideoOutGetState( CELL_VIDEO_OUT_PRIMARY, 0, &videoOutState );
if ( result < CELL_OK )
return result;
m_flRenderAspect = ( videoOutState.displayMode.aspect == CELL_VIDEO_OUT_ASPECT_4_3 ) ? ( 4.0f/3.0f ) : ( 16.0f / 9.0f );
// Set the gamma to deal with TV's having a darker gamma than computer monitors
result = cellSysmoduleLoadModule( CELL_SYSMODULE_AVCONF_EXT );
if ( result == CELL_OK )
{
cellVideoOutSetGamma( CELL_VIDEO_OUT_PRIMARY, 2.2f / 2.5f );
}
else
{
Warning( "***** ERROR calling cellSysmoduleLoadModule( CELL_SYSMODULE_AVCONF_EXT )! Gamma not set!\n" );
return result;
}
// Output video color settings
CellVideoOutDeviceInfo info;
cellVideoOutGetDeviceInfo( CELL_VIDEO_OUT_PRIMARY, 0, &info );
if ( info.rgbOutputRange == CELL_VIDEO_OUT_RGB_OUTPUT_RANGE_LIMITED )
{
DevMsg( "***** Video Out - Limited Range (16-235) - Gamma=%d *****\n", info.colorInfo.gamma );
}
else
{
DevMsg( "***** Video Out - Full Range (0-255) - Gamma=%d *****\n", info.colorInfo.gamma );
}
return CELL_OK;
}
void CPs3gcmGlobalState::CreateRsxBuffers()
{
//////////////////////////////////////////////////////////////////////////
//
// Create automatic display objects
//
if( m_nSurfaceRenderPitch != cellGcmGetTiledPitchSize( m_nRenderSize[0] * 4 ) )
{
Error("Pre-computed surface render pitch %u != %u = cellGcmGetTiledPitchSize( %u * 4 ) ", m_nSurfaceRenderPitch, cellGcmGetTiledPitchSize( m_nRenderSize[0] * 4 ), m_nRenderSize[0] );
}
m_display.surfaceFlipIdx = 0;
// Color buffers
for ( int k = 0; k < ARRAYSIZE( m_display.surfaceColor ); ++ k )
{
uint32 nRenderSize32bpp = GetRenderSurfaceBytes(); // 32-line vertical alignment required in local memory
m_display.surfaceColor[k].Alloc( kAllocPs3gcmColorBufferFB, nRenderSize32bpp );
cellGcmSetDisplayBuffer( k, m_display.surfaceColor[k].Offset(), m_nSurfaceRenderPitch, m_nRenderSize[0], m_nRenderSize[1] );
}
// Depth buffer
{
uint32 zcullSize[2] = { AlignValue( m_nRenderSize[0], 64 ), AlignValue( m_nRenderSize[1], 64 ) };
uint32 nDepthPitch = cellGcmGetTiledPitchSize( zcullSize[0] * 4 );
uint32 uDepthBufferSize32bpp = nDepthPitch * zcullSize[1];
uDepthBufferSize32bpp = AlignValue( uDepthBufferSize32bpp, PS3GCMALLOCATIONALIGN( kAllocPs3gcmDepthBuffer ) );
m_display.surfaceDepth.Alloc( kAllocPs3gcmDepthBuffer, uDepthBufferSize32bpp );
uint32 uiZcullIndex = m_display.surfaceDepth.ZcullMemoryIndex();
cellGcmBindZcull( uiZcullIndex,
m_display.surfaceDepth.Offset(),
zcullSize[0], zcullSize[1],
m_display.surfaceDepth.ZcullMemoryStart(),
CELL_GCM_ZCULL_Z24S8,
CELL_GCM_SURFACE_CENTER_1,
CELL_GCM_ZCULL_LESS,
CELL_GCM_ZCULL_LONES,
CELL_GCM_SCULL_SFUNC_ALWAYS,
0, 0 // sRef, sMask
);
uint32 uiTileIndex = m_display.surfaceDepth.TiledMemoryIndex();
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL, m_display.surfaceDepth.Offset(),
uDepthBufferSize32bpp, m_nSurfaceRenderPitch, CELL_GCM_COMPMODE_Z32_SEPSTENCIL_REGULAR,
m_display.surfaceDepth.TiledMemoryTagAreaBase(), // The area base + size/0x10000 will be allocated as the tag area.
3 ); // Default depth buffer on bank 3
cellGcmBindTile( uiTileIndex );
}
}
void CPs3gcmGlobalState::Shutdown()
{
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
cellGcmSetFlipHandler(NULL);
cellGcmSetVBlankHandler(NULL);
cellSysmoduleUnloadModule( CELL_SYSMODULE_AVCONF_EXT );
}
//--------------------------------------------------------------------------------------------------
// DawPrimUp code...
//--------------------------------------------------------------------------------------------------
uint32 CPs3gcmGlobalState::DrawPrimitiveUP(D3DPRIMITIVETYPE nPrimitiveType,UINT nPrimitiveCount,
CONST void *pVertexStreamZeroData, UINT nVertexStreamZeroStride )
{
// First Determine size required for this call
uint32 size = 0;
uint32 nIndexCount = GetGcmCount( nPrimitiveType, nPrimitiveCount );
uint32 nDataWords = ( nVertexStreamZeroStride * nIndexCount + 3 ) / sizeof( uint32 );
size = cellGcmSetWriteTextureLabelMeasureSize(size, GCM_LABEL_CALL_CMD_RING_SEG, 0 );
size = cellGcmSetInvalidateVertexCacheMeasureSize(size);
size = cellGcmSetDrawInlineArrayMeasureSize(size, GetGcmMode( nPrimitiveType ), nDataWords, pVertexStreamZeroData );
size = cellGcmSetReturnCommandMeasureSize(size);
size *=4;
// Check there is no space in the current segment
uint32 endPos, nextSeg, readSeg, writeSeg;
endPos = m_nCallWritePos + size;
writeSeg = m_nCallWritePos/GCM_CALLCMDSEGSIZE;
if ((endPos/GCM_CALLCMDSEGSIZE) != writeSeg)
{
// Move to the next segment
uint32 nextSeg = (writeSeg + 1) % (GCM_CALLCMDBUFFSIZE / GCM_CALLCMDSEGSIZE);
// Wait for RSX to not be in this segment
readSeg = m_nCallReadSegment;
if(nextSeg == readSeg) readSeg = *s_label_call_cmd_ring_seg;
gpGcmDrawState->CmdBufferFlush();
uint32 spins = 0;
while(nextSeg == readSeg)
{
spins++;
sys_timer_usleep(60);
readSeg = *s_label_call_cmd_ring_seg;
}
//if (spins > 1) Msg("Spins %d\n", spins);
// Move to next segmend abnd record new readSeg
m_nCallWritePos = (nextSeg * GCM_CALLCMDSEGSIZE);
writeSeg = nextSeg;
m_nCallReadSegment = readSeg;
// Msg("new Segment 0x%x\n", m_nCallWritePos);
}
uint32 ret = m_nCallWritePos + uintp(m_pCallCmdBuffer);
// Write Data
// Setup a context to do so
CellGcmContextData context;
context.begin = (uint32*)m_pCallCmdBuffer;
context.current = (uint32*)((uint8*)m_pCallCmdBuffer + m_nCallWritePos);
context.end = (uint32*)((uint8*)m_pCallCmdBuffer + GCM_CALLCMDBUFFSIZE);
context.callback = 0;
cellGcmSetWriteTextureLabelUnsafeInline(&context, GCM_LABEL_CALL_CMD_RING_SEG, writeSeg );
cellGcmSetInvalidateVertexCacheUnsafeInline(&context);
cellGcmSetDrawInlineArrayUnsafeInline(&context, GetGcmMode( nPrimitiveType ), nDataWords, pVertexStreamZeroData );
cellGcmSetReturnCommandUnsafeInline(&context);
// Update pointers
m_nCallWritePos += size;
return ret;
}
//--------------------------------------------------------------------------------------------------
// Command Buffer callback
//--------------------------------------------------------------------------------------------------
#define SEGSIZE 0x40000
#define SEGMASK 0x3FFFF
int32 CPs3gcmGlobalState::CmdBufferFull(struct CellGcmContextData * pGcmContext, uint32_t size)
{
// move to next SEGSIZE, and then wrap to start
// Determine where the next buffer will be
uint32 nIoAddress = (uint32)g_ps3gcmGlobalState.m_pIoAddress;
uint32 nextBufferStart = ((uint32)pGcmContext->begin + SEGSIZE) & (~SEGMASK);
nextBufferStart -= nIoAddress;
nextBufferStart &= (GCM_DEFCMDBUFFSIZE-1);
nextBufferStart = nextBufferStart ? (nextBufferStart + nIoAddress) : (SEGSIZE + nIoAddress);
// Flush RSX to this point
cellGcmFlushUnsafeInline(pGcmContext);
// put jump command to beginning of next buffer
uint32 nextBufferOffset = nextBufferStart - nIoAddress;
uint32 nextBufferEndOffset = ((nextBufferOffset + SEGSIZE) & (~SEGMASK)) - 4;
cellGcmSetJumpCommandUnsafeInline(pGcmContext, nextBufferStart - nIoAddress );
// get put/get/ref register address
volatile CellGcmControl* control = cellGcmGetControlRegister();
int count = 500000;
// wait for RSX to finish all commands in next buffer (it's a ring buffer)
volatile uint32_t get = (volatile uint32_t)control->get;
while( (get < 0x1000 ) || ( (get >= nextBufferOffset) && (get < nextBufferEndOffset) ) )
{
sys_timer_usleep( 30 );
get = (volatile uint32_t)control->get;
// count--;
// if (count < 1)
// {
// Msg("\n*****>>>> CmdBufferFull : get 0x%x : nextBufferOffset 0x%x : nextBufferEndOffset 0x%x\n", get, nextBufferOffset, nextBufferEndOffset );
// count = 1;
// }
}
// Set Command buffer context struct
pGcmContext->begin = (uint32*)nextBufferStart;
pGcmContext->end = (uint32*)(nextBufferEndOffset + nIoAddress);
pGcmContext->current = (uint32*)nextBufferStart;
return CELL_OK;
}
//--------------------------------------------------------------------------------------------------
// Flip Control
//
// Summary :
//
// Label used to cap the framerate. ie label to ensure flips no faster than 1 (60hz) or 2 (30Hz) vblanks.
// PPU blocks if previous flip not complete, so can't run too far ahead
// vblanks and flips noted by callbacks
//--------------------------------------------------------------------------------------------------
enum {
LABEL_FLIP_CONTROL_READY=1, // when label-before-flip is released
LABEL_FLIP_CONTROL_WAIT, // when label-before-flip is not released
/*
label_flip_control:
LABEL_FLIP_CONTROL_WAIT
=> (when releasing flip by ppu) => LABEL_FLIP_CONTROL_READY,
=> (when flip is finished by rsx) => LABEL_FLIP_CONTROL_WAIT,
*/
FLIP_STATE_V1=1,
FLIP_STATE_FLIP_RELEASED,
FLIP_STATE_FLIPPED,
/*
flip_status sequence (30fps or slower):
FLIP_STATE_FLIPPED
(at vblank callback) => FLIP_STATE_V1
(at vblank callback) =<release flip>=> FLIP_STATE_FLIP_RELEASED
(at flip callback) => FLIP_STATE_FLIPPED
*/
/*
flip_status sequence (60fps or slower):
FLIP_STATE_FLIPPED
(at vblank callback) =<release flip>=> FLIP_STATE_FLIP_RELEASED
(at flip callback) => FLIP_STATE_FLIPPED
*/
};
static volatile uint32_t *s_label_flip_control; // pointer to the flip control label
static int s_flip_status=FLIP_STATE_FLIPPED; // status variable to control flip
//--------------------------------------------------------------------------------------------------
static bool Gcm_ReleaseFlip(void)
{
if (*s_label_flip_control==LABEL_FLIP_CONTROL_READY) {
/* just in case rsx is running very slow somehow */
/* and flip_control label is not updated even after the real flip */
return false;
}
*s_label_flip_control=LABEL_FLIP_CONTROL_READY;
return true;
}
void updateCursorPosition(const int pixelX, const int pixelY)
{
cellGcmSetCursorPosition(pixelX, pixelY);
int32_t result = cellGcmUpdateCursor();
if( result == CELL_GCM_ERROR_FAILURE)
{
// [dkorus] this case happens until we initialize the cursor
//Msg(" hardware cursor error: cellGcmInitCursor() has not been called\n");
}
else if( result == CELL_GCM_ERROR_INVALID_VALUE )
{
Msg(" hardware cursor error: cursor bitmap is not correctly set\n");
}
}
void enableCursor()
{
if (cellGcmSetCursorEnable() != CELL_OK )
{
Msg( "Hardware Cursor Error: trouble with enable\n" );
}
if ( cellGcmUpdateCursor() != CELL_OK )
{
Msg( "Hardware Cursor Error: trouble with update\n" );
}
}
static void Gcm_VblankCallbackFunction(const uint32_t head)
{
// unused arg
(void)head;
int pixelX, pixelY;
if ( g_pInputSystem )
{
bool cursorEnabled = g_pInputSystem->GetPS3CursorPos( pixelX, pixelY );
if( cursorEnabled )
{
updateCursorPosition(pixelX,pixelY);
}
}
switch (s_flip_status){
case FLIP_STATE_FLIPPED:
if (g_ps3gcmGlobalState.m_flipMode == 30){
s_flip_status=FLIP_STATE_V1;
} else if (g_ps3gcmGlobalState.m_flipMode == 60){
if (Gcm_ReleaseFlip()){
s_flip_status=FLIP_STATE_FLIP_RELEASED;
}
}
break;
case FLIP_STATE_V1:
if (Gcm_ReleaseFlip()){
s_flip_status=FLIP_STATE_FLIP_RELEASED;
}
break;
case FLIP_STATE_FLIP_RELEASED:
break;
default:
assert(0);
}
}
static void Gcm_FlipCallbackFunction(const uint32_t head)
{
(void)head;
switch (s_flip_status){
case FLIP_STATE_FLIP_RELEASED:
s_flip_status=FLIP_STATE_FLIPPED;
break;
default:
break;
}
}
// initialize flip control state machine
static void Gcm_InitFlipControl(void)
{
cellGcmSetFlipMode( CELL_GCM_DISPLAY_HSYNC );
g_ps3gcmGlobalState.m_frameNo = 0;
g_ps3gcmGlobalState.m_finishIdx = 0;
s_label_flip_control=cellGcmGetLabelAddress(GCM_LABEL_FLIP_CONTROL);
*s_label_flip_control=LABEL_FLIP_CONTROL_WAIT;
cellGcmSetFlipHandler(Gcm_FlipCallbackFunction);
cellGcmSetVBlankHandler(Gcm_VblankCallbackFunction);
}
//--------------------------------------------------------------------------------------------------
// Beginscene, endscene and flip
//--------------------------------------------------------------------------------------------------
uint32 gCmdBufferHighWater = 0;
uint32 gCmdBufferStart = 0;
void CPs3gcmGlobalState::BeginScene()
{
gCmdBufferStart = (uint32)gpGcmContext->current;
gpGcmDrawState->BeginScene();
}
void CPs3gcmGlobalState::EndScene()
{
if ( (uint32)gpGcmContext->current > gCmdBufferStart )
{
uint32 bytes = (uint32)gpGcmContext->current - gCmdBufferStart;
if (bytes > gCmdBufferHighWater ) gCmdBufferHighWater = bytes;
}
gpGcmDrawState->EndScene();
}
float g_fliptime = 0;
void CPs3gcmGlobalState::SetFastFlip(bool onoff)
{
m_fastFlip = onoff;
g_fliptime = Plat_FloatTime();
}
extern void OnFrameTimestampAvailableRsx( float ms );
void CPs3gcmGlobalState::Flip()
{
cellSysutilCheckCallback();
if(m_fastFlip)
{
Gcm_ReleaseFlip();
float time = Plat_FloatTime();
if ( (time - g_fliptime) > 0.05) goto fullflip;
// Just end the frame, no point in flipping here...
gpGcmDrawState->EndFrame();
GCM_FUNC( cellGcmFlush );
goto newframe;
}
fullflip:
int idx, startIdx, endIdx;
//--------------------------------------------------------------------------------------------------
// Ensure any buffered state, copies etc... goes to GPU
//--------------------------------------------------------------------------------------------------
gpGcmDrawState->EndFrame();
//--------------------------------------------------------------------------------------------------
// Wait for previous frame Flip
//--------------------------------------------------------------------------------------------------
while (cellGcmGetFlipStatus()!=0){
g_pGcmSharedData->CheckForAudioRequest();
g_pGcmSharedData->CheckForServerRequest();
sys_timer_usleep(300);
}
// Insert end of gpu timestamp
idx = m_frameNo&1;
endIdx = GCM_REPORT_TIMESTAMP_FRAME_FIRST + (idx*2) + 1;
GCM_FUNC( cellGcmSetTimeStamp, endIdx );
//--------------------------------------------------------------------------------------------------
// If requested, lets defrag VRAM
//--------------------------------------------------------------------------------------------------
if (g_pGcmSharedData->m_bDeFrag)
{
g_pGcmSharedData->m_bDeFrag = 0;
extern void Ps3gcmLocalMemoryAllocator_CompactWithReason( char const *szReason );
Ps3gcmLocalMemoryAllocator_CompactWithReason( "End of Round" );
}
//--------------------------------------------------------------------------------------------------
// Get Timestamps
//--------------------------------------------------------------------------------------------------
if (m_frameNo)
{
idx = ((m_frameNo-1) & 1);
startIdx = GCM_REPORT_TIMESTAMP_FRAME_FIRST + (idx*2);
endIdx = startIdx+1;
uint64 uiStartTimestamp = cellGcmGetTimeStamp( startIdx );
uint64 uiEndTimestamp = cellGcmGetTimeStamp( endIdx );
uint64 uiRsxTimeInNanoSeconds = uiEndTimestamp - uiStartTimestamp;
OnFrameTimestampAvailableRsx( uiRsxTimeInNanoSeconds / 1000000.0f );
}
//--------------------------------------------------------------------------------------------------
// Insert new flip command and flush gpu
//--------------------------------------------------------------------------------------------------
// reset FlipStatus = 1
cellGcmResetFlipStatus();
// queue Flip command
GCM_FUNC( cellGcmSetFlipWithWaitLabel, m_display.surfaceFlipIdx, GCM_LABEL_FLIP_CONTROL, LABEL_FLIP_CONTROL_READY);
m_display.Flip();
GCM_FUNC( cellGcmSetWriteCommandLabel, GCM_LABEL_FLIP_CONTROL, LABEL_FLIP_CONTROL_WAIT);
GCM_FUNC( cellGcmSetWaitFlip );
GCM_FUNC( cellGcmFlush );
extern void Ps3gcmLocalMemoryAllocator_Reclaim();
Ps3gcmLocalMemoryAllocator_Reclaim();
//--------------------------------------------------------------------------------------------------
// Start a new frame
//--------------------------------------------------------------------------------------------------
newframe:
m_frameNo ++;
// Insert start of gpu timestamp
idx = m_frameNo&1;
startIdx = GCM_REPORT_TIMESTAMP_FRAME_FIRST + (idx*2);
GCM_FUNC( cellGcmSetTimeStamp, startIdx );
// Put RSX into known state for start of frame
gpGcmDrawState->ResetRsxState();
// Moved from DX present()
GCM_FUNC( cellGcmSetInvalidateVertexCache );
}
//--------------------------------------------------------------------------------------------------
// Buffer management
//--------------------------------------------------------------------------------------------------
CPs3gcmBuffer * CPs3gcmBuffer::New( uint32 uiSize, CPs3gcmAllocationType_t uType )
{
CPs3gcmBuffer * p = new CPs3gcmBuffer;
p->m_lmBlock.Alloc( uType, uiSize );
return p;
}
void CPs3gcmBuffer::Release()
{
// Wait for RSX to finish using the buffer memory
// and free it later
m_lmBlock.Free();
delete this;
}

View File

@@ -0,0 +1,272 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Gcm renderer state and util functions
//
//==================================================================================================
#ifndef INCLUDED_GCMSTATE_H
#define INCLUDED_GCMSTATE_H
#ifndef SPU
#include "tier0/platform.h"
#include "tier0/dbg.h"
#include "cell\gcm.h"
#include "gcmconfig.h"
#include "ps3gcmmemory.h"
#include "dxabstract_def.h"
#include "spumgr_ppu.h"
#else
#include "spumgr_spu.h"
#endif
//--------------------------------------------------------------------------------------------------
// Misc
//--------------------------------------------------------------------------------------------------
template <typename T>
inline T Min( T a, T b )
{
return a < b ? a : b;
}
template <typename T>
inline T Max( T a, T b )
{
return a > b ? a : b;
}
template <typename T>
inline void Swap( T& a , T & b )
{
T c = a; a = b; b = c;
}
//--------------------------------------------------------------------------------------------------
// Literals
//--------------------------------------------------------------------------------------------------
// IO Memory (page isze is 1MB, so make these add up to 1MB
#define GCM_MAINPOOLSIZE (0 * 0x100000) // IO memory for main pool
#define GCM_DEFCMDBUFFSIZE (1 * 0x200000) // Default command buff (must be pow 2)
#define GCM_CALLCMDBUFFSIZE (2 * 0x10000) // 256 K of cmd buffer to call to
// Used for DrawprimUP
#define GCM_CALLCMDSEGSIZE 0x8000 // 32K segmentation
#define GCM_PATCHBUFFSIZE ((2 * 0x100000) - GCM_CALLCMDBUFFSIZE)
#define GCM_PATCHSEGSIZE 0x8000
#define GCM_IOSIZE (GCM_MAINPOOLSIZE + GCM_DEFCMDBUFFSIZE + GCM_CALLCMDBUFFSIZE + GCM_PATCHBUFFSIZE)
//--------------------------------------------------------------------------------------------------
// Display Structure
//--------------------------------------------------------------------------------------------------
struct CPs3gcmDisplay
{
uint32 surfaceFlipIdx; // which scanout color buffer will be presented with flip
enum EnumConst_t { SURFACE_COUNT = 2 };
CPs3gcmLocalMemoryBlockSystemGlobal surfaceColor[SURFACE_COUNT]; // scanout color buffers for double-buffering
// (need one more to avoid overwriting old buffer)
CPs3gcmLocalMemoryBlockSystemGlobal surfaceDepth; // depth buffer
void Flip()
{
surfaceFlipIdx = NextSurfaceIndex();
}
uint NextSurfaceIndex( int nFrame = 1 )const
{
return ( surfaceFlipIdx + nFrame ) % SURFACE_COUNT;
}
uint PrevSurfaceIndex( int nFrame )const
{
int nResult = int( surfaceFlipIdx + 1000000 * SURFACE_COUNT - nFrame ) % int( SURFACE_COUNT );
Assert( uint( nResult ) < SURFACE_COUNT ); // if this is negative, it means we did ( ( something ) mod 2 ) mod 3, which makes no sense in this context
return uint( nResult );
}
};
//--------------------------------------------------------------------------------------------------
// Global GCM state class
//--------------------------------------------------------------------------------------------------
struct CPs3gcmGlobalState
{
//--------------------------------------------------------------------------------------------------
// Memory
// RSX Local, plus one block of memory mapped into RSX (IO mem)
// Main memory pool is within the IO mem and is used for textures until it fills...
//--------------------------------------------------------------------------------------------------
// RSX local memory
void * m_pLocalBaseAddress; // RSX Local Memory Base Address
uint32 m_nLocalBaseOffset; // cellGcmAddressToOffset( m_pLocalBaseAddress )
uint32 m_nLocalSize; // RSX Local Memory Size
// IO memory mapped into RSX
void * m_pIoAddress; // RSX IO buffer, base address
uint32 m_nIoSize; // RSX IO total size [including CMD buffer]
uint32 m_nIoOffsetDelta; // add this to EA to get Io Offset
// Call Cmd Buffer
void* m_pCallCmdBuffer;
uint32 m_nCallCmdBufferoffset;
uint32 m_nCallWritePos; // Current posn (offset)
uint32 m_nCallReadSegment;
// main memory pool buffer
void * m_pRsxMainMemoryPoolBuffer;
uint32 m_nRsxMainMemoryPoolBufferSize;
// Pointer to the draw states
uint32 m_eaDrawStates;
//--------------------------------------------------------------------------------------------------
// SPU Task
//--------------------------------------------------------------------------------------------------
SpuTaskHandle m_spuHandle;
//--------------------------------------------------------------------------------------------------
// Patched Shaders
//--------------------------------------------------------------------------------------------------
uint8* m_pPatchBuff;
uint32 m_nPatchIdx; // Write index for this frames patch buffer
uint32 m_nPatchReadSeg;
//--------------------------------------------------------------------------------------------------
// Empty pixel shader
//--------------------------------------------------------------------------------------------------
CPs3gcmLocalMemoryBlock m_pShaderPsEmptyBuffer;
CgBinaryProgram *m_pShaderPsEmpty; // empty pixel shader
uint32 m_nPsEmptyShaderControl0;
uint32 m_nPsEmptyAttributeInputMask;
//--------------------------------------------------------------------------------------------------
// Flip data
//--------------------------------------------------------------------------------------------------
uint32 m_flipMode; // Holds 30 or 60
uint32 m_frameNo;
uint32 m_finishIdx;
bool m_fastFlip;
//--------------------------------------------------------------------------------------------------
// Display
//--------------------------------------------------------------------------------------------------
// Display size, aspect, pitch
uint16 m_nRenderSize[2]; // with & height of the render buffer
float m_flRenderAspect; // aspect ratio of the output device
uint32 m_nSurfaceRenderPitch;
CPs3gcmDisplay m_display;
//--------------------------------------------------------------------------------------------------
// Methods
//--------------------------------------------------------------------------------------------------
public:
int32 Init();
void Shutdown();
void BeginScene();
void EndScene();
void Flip();
void SetFastFlip(bool onoff);
static int32_t CmdBufferFull(struct CellGcmContextData * pGcmContext, uint32_t size);
// DrawPrimUP puts a drawprimup call into the call buffer, with a label and RET.
// It's called from the gcmdrawstate which then sends a drawcall packet to the SPU
uint32 DrawPrimitiveUP(D3DPRIMITIVETYPE nPrimitiveType,UINT nPrimitiveCount,
CONST void *pVertexStreamZeroData, UINT nVertexStreamZeroStride );
// GetRenderSurfaceBytes Note:
// Height alignment must be 32 for tiled surfaces on RSX
// 128 for Edge Post MLAA
// 64 for Edge Post MLAA with EDGE_POST_MLAA_MODE_TRANSPOSE_64 flag set
inline uint GetRenderSurfaceBytes( uint nHeightAlignment = 32 ) const ;
private:
int InitGcm();
int InitVideo();
void CreateRsxBuffers(); // Display buffers and defaut allocated RTs etc..
void CreateIoBuffers(); // Allocs IO memory (mapped in Initgcm)
};
//--------------------------------------------------------------------------------------------------
// Inlines
//--------------------------------------------------------------------------------------------------
inline uint CPs3gcmGlobalState::GetRenderSurfaceBytes( uint nHeightAlignment) const
{
return m_nSurfaceRenderPitch * AlignValue( m_nRenderSize[1], nHeightAlignment );
}
//--------------------------------------------------------------------------------------------------
// Extern Globals
//--------------------------------------------------------------------------------------------------
extern CellGcmContextData gGcmContext;
extern CellGcmContextData* gpGcmContext;
extern CPs3gcmGlobalState g_ps3gcmGlobalState;
extern CellGcmContextData gCallContext;
extern CellGcmContextData* gpCallContext;
//--------------------------------------------------------------------------------------------------
// Memory block funcs that need access to g_ps3gcmGlobalState
//--------------------------------------------------------------------------------------------------
#ifndef SPU
inline char * CPs3gcmLocalMemoryBlock::DataInLocalMemory() const
{
Assert( IsLocalMemory() );
return
( m_nLocalMemoryOffset - g_ps3gcmGlobalState.m_nLocalBaseOffset ) +
( char * ) g_ps3gcmGlobalState.m_pLocalBaseAddress;
}
inline char * CPs3gcmLocalMemoryBlock::DataInMainMemory() const
{
Assert( !IsLocalMemory() && IsRsxMappedMemory() );
return
m_nLocalMemoryOffset +
( ( char * ) g_ps3gcmGlobalState.m_pIoAddress );
}
inline char * CPs3gcmLocalMemoryBlock::DataInMallocMemory() const
{
Assert( !IsLocalMemory() && !IsRsxMappedMemory() );
return ( char * ) m_nLocalMemoryOffset;
}
inline char * CPs3gcmLocalMemoryBlock::DataInAnyMemory() const
{
switch ( PS3GCMALLOCATIONPOOL( m_uType ) )
{
default: return DataInLocalMemory();
case kGcmAllocPoolMainMemory: return DataInMainMemory();
case kGcmAllocPoolMallocMemory: return DataInMallocMemory();
}
}
#endif
#endif // INCLUDED_GCMSTATE_H

View File

@@ -0,0 +1,515 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
#include "tier0/platform.h"
#include "tier0/dbg.h"
#include "tier1/strtools.h"
#include "tier1/utlbuffer.h"
#include "utlmap.h"
#include "ps3gcmmemory.h"
#include "gcmstate.h"
#include "bitmap/imageformat_declarations.h"
#include "gcmtexture.h"
#include "memdbgon.h"
#ifdef _CERT
#define Debugger() ((void)0)
#else
#define Debugger() DebuggerBreak()
#endif
//--------------------------------------------------------------------------------------------------
// Texture Layouts
//--------------------------------------------------------------------------------------------------
#ifdef _CERT
#define GLMTEX_FMT_DESC( x )
#else
#define GLMTEX_FMT_DESC( x ) x ,
#endif
#define CELL_GCM_REMAP_MODE_OIO(order, inputARGB, outputARGB) \
(((order)<<16)|((inputARGB))|((outputARGB)<<8))
#define REMAPO( x ) CELL_GCM_TEXTURE_REMAP_ORDER_X##x##XY
#define REMAP4(a,r,g,b) (((a)<<0)|((r)<<2)|((g)<<4)|((b)<<6))
#define REMAP_ARGB REMAP4( CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G, CELL_GCM_TEXTURE_REMAP_FROM_B )
#define REMAP_4 REMAP4( CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP )
#define REMAP_13 REMAP4( CELL_GCM_TEXTURE_REMAP_ONE, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP )
#define REMAP_4X(x) REMAP4( x, x, x, x )
#define REMAP_13X(y, x) REMAP4( y, x, x, x )
#define REMAP_ALL_DEFAULT CELL_GCM_REMAP_MODE_OIO( REMAPO(Y), REMAP_ARGB, REMAP_4 )
#define REMAP_ALL_DEFAULT_X CELL_GCM_REMAP_MODE_OIO( REMAPO(X), REMAP_ARGB, REMAP_4 )
#define CAP( x ) CPs3gcmTextureLayout::Format_t::kCap##x
CPs3gcmTextureLayout::Format_t g_ps3texFormats[PS3_TEX_MAX_FORMAT_COUNT] =
{
// summ-name d3d-format
// gcmRemap
// gcmFormat
// gcmPitchPer4X gcmFlags
{ GLMTEX_FMT_DESC("_D16") D3DFMT_D16,
REMAP_ALL_DEFAULT,
8,
CELL_GCM_TEXTURE_DEPTH16,
0 },
{ GLMTEX_FMT_DESC("_D24X8") D3DFMT_D24X8,
REMAP_ALL_DEFAULT,
16,
CELL_GCM_TEXTURE_DEPTH24_D8,
0 },
{ GLMTEX_FMT_DESC("_D24S8") D3DFMT_D24S8,
REMAP_ALL_DEFAULT,
16,
CELL_GCM_TEXTURE_DEPTH24_D8,
0 },
{ GLMTEX_FMT_DESC("_A8R8G8B8") D3DFMT_A8R8G8B8,
REMAP_ALL_DEFAULT,
16,
CELL_GCM_TEXTURE_A8R8G8B8,
CAP(SRGB) },
{ GLMTEX_FMT_DESC("_X8R8G8B8") D3DFMT_X8R8G8B8,
REMAP_ALL_DEFAULT,
16,
CELL_GCM_TEXTURE_A8R8G8B8,
CAP(SRGB) },
{ GLMTEX_FMT_DESC("_X1R5G5B5") D3DFMT_X1R5G5B5,
CELL_GCM_REMAP_MODE_OIO( REMAPO(X), REMAP_ARGB, REMAP_13 ),
8,
CELL_GCM_TEXTURE_R5G6B5,
0 },
{ GLMTEX_FMT_DESC("_A1R5G5B5") D3DFMT_A1R5G5B5,
REMAP_ALL_DEFAULT_X,
8,
CELL_GCM_TEXTURE_A1R5G5B5,
0 },
{ GLMTEX_FMT_DESC("_L8") D3DFMT_L8,
CELL_GCM_REMAP_MODE_OIO( REMAPO(Y), REMAP_4X(CELL_GCM_TEXTURE_REMAP_FROM_B), REMAP_13 ),
4,
CELL_GCM_TEXTURE_B8,
0 },
{ GLMTEX_FMT_DESC("_A8L8") D3DFMT_A8L8,
CELL_GCM_REMAP_MODE_OIO( REMAPO(Y), REMAP_13X( CELL_GCM_TEXTURE_REMAP_FROM_G, CELL_GCM_TEXTURE_REMAP_FROM_B), REMAP_4 ),
8,
CELL_GCM_TEXTURE_G8B8,
0 },
{ GLMTEX_FMT_DESC("_DXT1") D3DFMT_DXT1,
CELL_GCM_REMAP_MODE_OIO( REMAPO(Y), REMAP_ARGB, REMAP_13 ),
8,
CELL_GCM_TEXTURE_COMPRESSED_DXT1,
CAP(SRGB) | CAP(4xBlocks) },
{ GLMTEX_FMT_DESC("_DXT3") D3DFMT_DXT3,
REMAP_ALL_DEFAULT,
16,
CELL_GCM_TEXTURE_COMPRESSED_DXT23,
CAP(SRGB) | CAP(4xBlocks) },
{ GLMTEX_FMT_DESC("_DXT5") D3DFMT_DXT5,
REMAP_ALL_DEFAULT,
16,
CELL_GCM_TEXTURE_COMPRESSED_DXT45,
CAP(SRGB) | CAP(4xBlocks) },
{ GLMTEX_FMT_DESC("_A16B16G16R16F") D3DFMT_A16B16G16R16F,
REMAP_ALL_DEFAULT_X,
32,
CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT,
0 },
{ GLMTEX_FMT_DESC("_A16B16G16R16") D3DFMT_A16B16G16R16,
REMAP_ALL_DEFAULT_X,
64,
CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT,
0 },
{ GLMTEX_FMT_DESC("_A32B32G32R32F") D3DFMT_A32B32G32R32F,
REMAP_ALL_DEFAULT_X,
64,
CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT,
0 },
{ GLMTEX_FMT_DESC("_R8G8B8") D3DFMT_R8G8B8,
CELL_GCM_REMAP_MODE_OIO( REMAPO(Y),
REMAP4( CELL_GCM_TEXTURE_REMAP_FROM_B, CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G ),
REMAP_13 ),
16,
CELL_GCM_TEXTURE_A8R8G8B8,
CAP(SRGB) },
{ GLMTEX_FMT_DESC("_A8") D3DFMT_A8,
CELL_GCM_REMAP_MODE_OIO( REMAPO(Y),
REMAP4( CELL_GCM_TEXTURE_REMAP_FROM_B, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_B, CELL_GCM_TEXTURE_REMAP_FROM_B ),
REMAP_13X( CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_ZERO ) ),
4,
CELL_GCM_TEXTURE_B8,
0 },
{ GLMTEX_FMT_DESC("_R5G6B5") D3DFMT_R5G6B5,
CELL_GCM_REMAP_MODE_OIO( REMAPO(Y),
REMAP4( CELL_GCM_TEXTURE_REMAP_FROM_B, CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G ),
REMAP_13 ),
16,
CELL_GCM_TEXTURE_A8R8G8B8,
CAP(SRGB) },
{ GLMTEX_FMT_DESC("_Q8W8V8U8") D3DFMT_Q8W8V8U8,
REMAP_ALL_DEFAULT,
16,
CELL_GCM_TEXTURE_A8R8G8B8,
CAP(SRGB) },
};
uint g_nPs3texFormatCount = PS3_TEX_CANONICAL_FORMAT_COUNT;
#undef CAP
#undef GLMTEX_FMT_DESC
static bool Ps3texLayoutLessFunc( CPs3gcmTextureLayout::Key_t const &a, CPs3gcmTextureLayout::Key_t const &b )
{
return ( memcmp( &a, &b, sizeof( CPs3gcmTextureLayout::Key_t ) ) < 0 );
}
static CUtlMap< CPs3gcmTextureLayout::Key_t, CPs3gcmTextureLayout const * > s_ps3texLayouts( Ps3texLayoutLessFunc );
CPs3gcmTextureLayout const * CPs3gcmTextureLayout::New( Key_t const &k )
{
// look up 'key' in the map and see if it's a hit, if so, bump the refcount and return
// if not, generate a completed layout based on the key, add to map, set refcount to 1, return that
unsigned short index = s_ps3texLayouts.Find( k );
if ( index != s_ps3texLayouts.InvalidIndex() )
{
CPs3gcmTextureLayout const *layout = s_ps3texLayouts[ index ];
++ layout->m_refCount;
return layout;
}
// Need to generate complete information about the texture layout
uint8 nMips = ( k.m_texFlags & kfMip ) ? k.m_nActualMipCount : 1;
uint8 nFaces = ( k.m_texFlags & kfTypeCubeMap ) ? 6 : 1;
uint32 nSlices = nMips * nFaces;
// Allocate layout memory
size_t numLayoutBytes = sizeof( CPs3gcmTextureLayout ) + nSlices * sizeof( Slice_t );
CPs3gcmTextureLayout *layout = ( CPs3gcmTextureLayout * ) MemAlloc_AllocAligned( numLayoutBytes, 16 );
memset( layout, 0, numLayoutBytes );
memcpy( &layout->m_key, &k, sizeof( Key_t ) );
layout->m_refCount = 1;
// Find the format descriptor
for ( int j = 0; j < PS3_TEX_CANONICAL_FORMAT_COUNT; ++ j )
{
if ( g_ps3texFormats[j].m_d3dFormat == k.m_texFormat )
{
layout->m_nFormat = j;
break;
}
Assert( j != PS3_TEX_CANONICAL_FORMAT_COUNT - 1 );
}
layout->m_mipCount = nMips;
//
// Slices
//
bool bSwizzled = layout->IsSwizzled();
size_t fmtPitch = layout->GetFormatPtr()->m_gcmPitchPer4X;
size_t fmtPitchBlock = ( layout->GetFormatPtr()->m_gcmCaps & CPs3gcmTextureLayout::Format_t::kCap4xBlocks ) ? 16 : 4;
size_t numDataBytes = 0;
Slice_t *pSlice = &layout->m_slices[0];
for ( int face = 0; face < nFaces; ++ face )
{
// For cubemaps every next face in swizzled addressing
// must be aligned on 128-byte boundary
if ( bSwizzled )
{
numDataBytes = ( numDataBytes + 127 ) & ~127;
}
for ( int mip = 0; mip < nMips; ++ mip, ++ pSlice )
{
for ( int j = 0; j < ARRAYSIZE( k.m_size ); ++ j )
{
pSlice->m_size[j] = k.m_size[j] >> mip;
pSlice->m_size[j] = MAX( pSlice->m_size[j], 1 );
}
pSlice->m_storageOffset = numDataBytes;
size_t numTexels;
// For linear layout textures every mip row must be padded to the
// width of the original highest level mip so that the pitch was
// the same for every mip
if ( bSwizzled )
numTexels = ( pSlice->m_size[0] * pSlice->m_size[1] * pSlice->m_size[2] );
else
numTexels = ( k.m_size[0] * pSlice->m_size[1] * pSlice->m_size[2] );
size_t numBytes = ( numTexels * fmtPitch ) / fmtPitchBlock;
if ( layout->GetFormatPtr()->m_gcmCaps & CPs3gcmTextureLayout::Format_t::kCap4xBlocks )
{
// Ensure the size of the smallest mipmap levels of DXT1/3/5 textures (the 1x1 and 2x2 mips) is accurately computed.
numBytes = MAX( numBytes, fmtPitch );
}
pSlice->m_storageSize = MAX( numBytes, 1 );
numDataBytes += pSlice->m_storageSize;
}
}
// Make the total size 128-byte aligned
// Realistically it is required only for depth textures
numDataBytes = ( numDataBytes + 127 ) & ~127;
//
// Tiled and ZCull memory adjustments
//
layout->m_gcmAllocType = GCM_MAINPOOLSIZE ? kAllocPs3gcmTextureData0 : kAllocPs3gcmTextureData;
if ( layout->IsTiledMemory() )
{
if( g_nPs3texFormatCount >= PS3_TEX_MAX_FORMAT_COUNT )
{
Error("Modified ps3 format array overflow. Increase PS3_TEX_MAX_FORMAT_COUNT appropriately and recompile\n");
}
Format_t *pModifiedFormat = &g_ps3texFormats[g_nPs3texFormatCount];
V_memcpy( pModifiedFormat, layout->GetFormatPtr(), sizeof( Format_t ) );
layout->m_nFormat = g_nPs3texFormatCount;
g_nPs3texFormatCount ++;
if ( k.m_texFlags & kfTypeDepthStencil )
{
//
// Tiled Zcull Surface
//
uint32 zcullSize[2] = { AlignValue( k.m_size[0], 64 ), AlignValue( k.m_size[1], 64 ) };
uint32 nDepthPitch;
if ( k.m_texFormat == D3DFMT_D16 )
nDepthPitch = cellGcmGetTiledPitchSize( zcullSize[0] * 2 );
else
nDepthPitch = cellGcmGetTiledPitchSize( zcullSize[0] * 4 );
pModifiedFormat->m_gcmPitchPer4X = nDepthPitch;
uint32 uDepthBufferSize32bpp = nDepthPitch * zcullSize[1];
uDepthBufferSize32bpp = AlignValue( uDepthBufferSize32bpp, PS3GCMALLOCATIONALIGN( kAllocPs3gcmDepthBuffer ) );
Assert( uDepthBufferSize32bpp >= numDataBytes );
numDataBytes = uDepthBufferSize32bpp;
layout->m_gcmAllocType = kAllocPs3gcmDepthBuffer;
}
else
{
//
// Tiled Color Surface
//
uint32 nTiledPitch = cellGcmGetTiledPitchSize( k.m_size[0] * layout->GetFormatPtr()->m_gcmPitchPer4X / 4 );
pModifiedFormat->m_gcmPitchPer4X = nTiledPitch;
// We Don't allocate any 512x512 RTs (they are used only when in PAL576i which can use the FB mem pool)
/*if ( k.m_size[0] == 512 && k.m_size[1] == 512 && k.m_size[2] == 1 )
layout->m_gcmAllocType = kAllocPs3gcmColorBuffer512;
else*/
if ( k.m_size[0] == g_ps3gcmGlobalState.m_nRenderSize[0] && k.m_size[1] == g_ps3gcmGlobalState.m_nRenderSize[1] && k.m_size[2] == 1 )
layout->m_gcmAllocType = kAllocPs3gcmColorBufferFB;
else if ( k.m_size[0] == g_ps3gcmGlobalState.m_nRenderSize[0]/4 && k.m_size[1] == g_ps3gcmGlobalState.m_nRenderSize[1]/4 && k.m_size[2] == 1 )
layout->m_gcmAllocType = kAllocPs3gcmColorBufferFBQ;
else
layout->m_gcmAllocType = kAllocPs3gcmColorBufferMisc;
uint32 uRenderSize = nTiledPitch * AlignValue( k.m_size[1], 32 ); // 32-line vertical alignment required in local memory
if ( layout->m_gcmAllocType == kAllocPs3gcmColorBufferMisc )
uRenderSize = AlignValue( uRenderSize, PS3GCMALLOCATIONALIGN( kAllocPs3gcmColorBufferMisc ) );
Assert( uRenderSize >= numDataBytes );
numDataBytes = uRenderSize;
}
}
layout->m_storageTotalSize = numDataBytes;
//
// Finished creating the layout information
//
#ifndef _CERT
// generate summary
// "target, format, +/- mips, base size"
char scratch[1024];
char *targetname = targetname = "2D ";
if ( layout->IsVolumeTex() )
targetname = "3D ";
if ( layout->IsCubeMap() )
targetname = "CUBE";
sprintf( scratch, "[%s %s %dx%dx%d mips=%d slices=%d flags=%02X%s]",
targetname,
layout->GetFormatPtr()->m_formatSummary,
layout->m_key.m_size[0], layout->m_key.m_size[1], layout->m_key.m_size[2],
nMips,
nSlices,
layout->m_key.m_texFlags,
(layout->m_key.m_texFlags & kfSrgbEnabled) ? " SRGB" : ""
);
layout->m_layoutSummary = strdup( scratch );
#endif
// then insert into map. disregard returned index.
s_ps3texLayouts.Insert( k, layout );
return layout;
}
void CPs3gcmTextureLayout::Release() const
{
-- m_refCount;
// keep the layout in the map for easy access
Assert( m_refCount >= 0 );
}
//////////////////////////////////////////////////////////////////////////
//
// Texture management
//
CPs3gcmTexture * CPs3gcmTexture::New( CPs3gcmTextureLayout::Key_t const &key )
{
//
// Allocate a new layout for the texture
//
CPs3gcmTextureLayout const *pLayout = CPs3gcmTextureLayout::New( key );
if ( !pLayout )
{
Debugger();
return NULL;
}
CPs3gcmTexture *tex = (CPs3gcmTexture *)MemAlloc_AllocAligned( sizeof( CPs3gcmTexture ), 16 );
memset( tex, 0, sizeof( CPs3gcmTexture ) ); // NOTE: This clears the CPs3gcmLocalMemoryBlock
tex->m_layout = pLayout;
CPs3gcmAllocationType_t uAllocationType = pLayout->m_gcmAllocType;
if ( key.m_texFlags & CPs3gcmTextureLayout::kfNoD3DMemory )
{
if ( ( uAllocationType == kAllocPs3gcmDepthBuffer ) || ( uAllocationType == kAllocPs3gcmColorBufferMisc ) )
{
Assert( 0 );
Warning( "ERROR: (CPs3gcmTexture::New) depth/colour buffers should not be marked with kfNoD3DMemory!\n" );
}
else
{
// Early-out, storage will be allocated later (via IDirect3DDevice9::AllocateTextureStorage)
return tex;
}
}
tex->Allocate();
return tex;
}
void CPs3gcmTexture::Release()
{
// Wait for RSX to finish using the texture memory
// and free it later
if ( m_lmBlock.Size() )
{
m_lmBlock.Free();
}
m_layout->Release();
MemAlloc_FreeAligned( this );
}
bool CPs3gcmTexture::Allocate()
{
if ( m_lmBlock.Size() )
{
// Already allocated!
Assert( 0 );
Warning( "ERROR: CPs3gcmTexture::Allocate called twice!\n" );
return true;
}
CPs3gcmAllocationType_t uAllocationType = m_layout->m_gcmAllocType;
const CPs3gcmTextureLayout::Key_t & key = m_layout->m_key;
// if kAllocPs3gcmTextureData0 (main memory) fails try kAllocPs3gcmTextureData
if (!m_lmBlock.Alloc( uAllocationType, m_layout->m_storageTotalSize ) )
{
if (m_layout->m_gcmAllocType == kAllocPs3gcmTextureData0)
{
m_layout->m_gcmAllocType = kAllocPs3gcmTextureData;
CPs3gcmAllocationType_t uAllocationType = m_layout->m_gcmAllocType;
m_lmBlock.Alloc( uAllocationType, m_layout->m_storageTotalSize );
}
}
if ( m_layout->IsTiledMemory() )
{
if ( uAllocationType == kAllocPs3gcmDepthBuffer )
{
bool bIs16BitDepth = ( m_layout->GetFormatPtr()->m_gcmFormat == CELL_GCM_TEXTURE_DEPTH16 ) || ( m_layout->m_nFormat == CELL_GCM_TEXTURE_DEPTH16_FLOAT );
uint32 zcullSize[2] = { AlignValue( key.m_size[0], 64 ), AlignValue( key.m_size[1], 64 ) };
uint32 uiZcullIndex = m_lmBlock.ZcullMemoryIndex();
cellGcmBindZcull( uiZcullIndex,
m_lmBlock.Offset(),
zcullSize[0], zcullSize[1],
m_lmBlock.ZcullMemoryStart(),
bIs16BitDepth ? CELL_GCM_ZCULL_Z16 : CELL_GCM_ZCULL_Z24S8,
CELL_GCM_SURFACE_CENTER_1,
CELL_GCM_ZCULL_LESS,
CELL_GCM_ZCULL_LONES,
CELL_GCM_SCULL_SFUNC_ALWAYS,
0, 0 // sRef, sMask
);
uint32 uiTileIndex = m_lmBlock.TiledMemoryIndex();
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL, m_lmBlock.Offset(),
m_layout->m_storageTotalSize, m_layout->DefaultPitch(), bIs16BitDepth ? CELL_GCM_COMPMODE_DISABLED : CELL_GCM_COMPMODE_Z32_SEPSTENCIL_REGULAR,
m_lmBlock.TiledMemoryTagAreaBase(), // The area base + size/0x10000 will be allocated as the tag area.
1 ); // Misc depth buffers on bank 1
cellGcmBindTile( uiTileIndex );
}
else if ( uAllocationType == kAllocPs3gcmColorBufferMisc )
{
uint32 uiTileIndex = m_lmBlock.TiledMemoryIndex();
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL, m_lmBlock.Offset(),
m_layout->m_storageTotalSize, m_layout->DefaultPitch(), CELL_GCM_COMPMODE_DISABLED,
m_lmBlock.TiledMemoryTagAreaBase(), // The area base + size/0x10000 will be allocated as the tag area.
1 ); // Tile misc color buffers on bank 1
cellGcmBindTile( uiTileIndex );
}
}
#ifdef _DEBUG
memset( Data(), 0, m_layout->m_storageTotalSize ); // initialize texture data to BLACK in DEBUG
#endif
return true;
}

View File

@@ -0,0 +1,250 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Texture Layout, CPs3gcmTexture, and CPs3gcmTextureData_t
//
//==================================================================================================
#ifndef INCLUDED_GCMTEXTURE_H
#define INCLUDED_GCMTEXTURE_H
#include "ps3/ps3_platform.h"
#include "ps3gcmmemory.h"
#include "gcmstate.h"
//--------------------------------------------------------------------------------------------------
// Literals
//--------------------------------------------------------------------------------------------------
#define PS3_TEX_MAX_FORMAT_COUNT 48
#define PS3_TEX_CANONICAL_FORMAT_COUNT 19
//--------------------------------------------------------------------------------------------------
// Texture layout, texture etc..
//--------------------------------------------------------------------------------------------------
struct ALIGN16 CPs3gcmTextureLayout
{
#ifndef _CERT
char *m_layoutSummary; // for debug visibility
#endif
// format mapping description
struct ALIGN16 Format_t
{
#ifndef _CERT
char *m_formatSummary; // for debug visibility
#endif
enum GcmCaps_t
{
kCapSRGB = (1<<0), // GCM can sample it as SRGB
kCap4xBlocks = (1<<1), // Pitch is referring to 4 texel blocks and not single texel blocks (DXT)
};
D3DFORMAT m_d3dFormat; // what D3D knows it as; see public/bitmap/imageformat.h
uint32 m_gcmRemap; // GCM remap mask
uint16 m_gcmPitchPer4X; // GCM pitch multiplier per every 4 pixels of width
uint8 m_gcmFormat; // GCM format
uint8 m_gcmCaps; // GCM caps of this texture
}
ALIGN16_POST;
// const inputs used for hashing
struct Key_t
{
D3DFORMAT m_texFormat; // D3D texel format
uint16 m_size[3]; // dimensions of the base mip
uint8 m_texFlags; // mipped, autogen mips, render target, ... ?
uint8 m_nActualMipCount; // Actual number of mips; on console builds, we typically drop the smallest (highest index)
// mips to save space (they waste a lot of space for page-alignment reasons)
// high-bit 0x80 indicates cubemap
};
// layout flags
enum Flags_t
{
kfDynamicNoSwizzle = (1<<0), // Indicates whether this texture needs to keep a backing store for incremental updates.
// (On PS3 this will prevent texture from being swizzled to allow CPU writes at subrect offsets)
kfMip = (1<<1),
kfMipAuto = (1<<2),
kfTypeRenderable = (1<<3),
kfTypeDepthStencil = (1<<4),
kfTypeCubeMap = (1<<5),
kfSrgbEnabled = (1<<6),
kfNoD3DMemory = (1<<7), // Allocation of storage for the bits has been deferred (call IDirect3DDevice9::AllocateTextureStorage to do the allocation)
// -!!--!!- DO NOT ADD MORE FLAGS -!!--!!- (m_texFlags is only 8 bits)
};
// slice information
struct Slice_t
{
uint32 m_storageOffset; //where in the storage slab does this slice live
uint32 m_storageSize; //how much storage does this slice occupy
uint16 m_size[3]; //texel dimensions of this slice
};
//
// Structure definition
//
Key_t m_key; // key of the layout
int32 mutable m_refCount; // refcount
uint32 m_storageTotalSize; // size of storage slab required
uint16 m_nFormat; // format specific info; index in g_ps3texFormats table
uint8 m_mipCount; // derived by starting at base size and working down towards 1x1
CPs3gcmAllocationType_t mutable m_gcmAllocType; // type of GCM allocation to determine pool/alignment/etc.
#ifndef SPU
// slice array
Slice_t m_slices[0]; // dynamically allocated 2-d array [faces][mips]
public:
inline int SlicePitch( int iSlice ) const;
inline int DefaultPitch() const;
inline const Format_t * GetFormatPtr()const;
#endif
public:
inline bool IsSwizzled() const { return !( m_key.m_texFlags & ( kfDynamicNoSwizzle | kfTypeRenderable ) ) && IsPowerOfTwo( m_key.m_size[0] ) && IsPowerOfTwo( m_key.m_size[1] ) && IsPowerOfTwo( m_key.m_size[2] ); }
inline bool IsCubeMap() const { return !!(m_key.m_texFlags & kfTypeCubeMap); }
inline bool IsVolumeTex() const { return !!(m_key.m_size[2] > 1); }
inline bool IsTiledMemory() const { return (m_key.m_texFlags & ( kfTypeRenderable | kfDynamicNoSwizzle )) == kfTypeRenderable; }
inline int FaceCount() const { return ( !IsCubeMap() ) ? 1 : 6; }
inline int MipCount() const { return ( m_key.m_texFlags & kfMip ) ? m_key.m_nActualMipCount : 1; }
inline int SlicePitch2( int iSlice, const Slice_t* pSlices, const Format_t *pTexFormats ) const{ return !IsTiledMemory() ? ( ( IsSwizzled() ? pSlices[iSlice].m_size[0] : m_key.m_size[0] ) * pTexFormats[m_nFormat].m_gcmPitchPer4X / 4 ) : pTexFormats[m_nFormat].m_gcmPitchPer4X; }
inline int DefaultPitch2( const Format_t *pTexFormats ) const { return !IsTiledMemory() ? m_key.m_size[0] * pTexFormats[m_nFormat].m_gcmPitchPer4X / 4 : pTexFormats[m_nFormat].m_gcmPitchPer4X; }
inline int SliceIndex( int face, int mip ) const { return mip + ( face * MipCount() ); }
public:
#ifndef SPU
static CPs3gcmTextureLayout const * New( Key_t const &k );
void Release() const;
#endif
}
ALIGN16_POST;
extern CPs3gcmTextureLayout::Format_t g_ps3texFormats[PS3_TEX_MAX_FORMAT_COUNT];
extern uint g_nPs3texFormatCount;
#ifndef SPU
// convenience functions on PPU that use implicit tables always accessible on PPU
inline int CPs3gcmTextureLayout::SlicePitch( int iSlice ) const
{
return SlicePitch2( iSlice, &m_slices[0], g_ps3texFormats );
}
inline int CPs3gcmTextureLayout::DefaultPitch() const
{
return DefaultPitch2( g_ps3texFormats );
}
inline const CPs3gcmTextureLayout::Format_t * CPs3gcmTextureLayout::GetFormatPtr()const
{
return &g_ps3texFormats[ m_nFormat ];
}
#endif
struct ALIGN16 CPs3gcmTexture
{
CPs3gcmTextureLayout const *m_layout; // this structure persists. see CPs3gcmTextureLayout::Release( it asserts if refcount goes down to zero )
ALIGN16 CPs3gcmLocalMemoryBlock m_lmBlock ALIGN16_POST; // this structure has the Offset, and the texture bits at that offset persist until all Draw calls are made that use it
inline uint32 Offset()const { Assert( m_lmBlock.Size() ); return m_lmBlock.Offset(); }
#ifndef SPU
inline char * Data() { Assert( m_lmBlock.Size() ); return m_lmBlock.DataInAnyMemory(); }
#endif
public:
#ifndef SPU
static CPs3gcmTexture * New( CPs3gcmTextureLayout::Key_t const &key );
void Release();
bool Allocate();
#endif
}
ALIGN16_POST;
struct CPs3gcmTextureData_t
{
// CPs3gcmTextureLayout const *m_eaLayout
uint32 m_eaLayout; // this structure persists. see CPs3gcmTextureLayout::Release( it asserts if refcount goes down to zero )
uint32 m_nLocalOffset; // the offset of the texture bits
void Assign( const CPs3gcmTexture * pThat )
{
if( pThat )
{
m_eaLayout = ( uint32 )pThat->m_layout;
m_nLocalOffset = pThat->Offset();
Assert( m_eaLayout ? !( 15 & ( uintp( m_eaLayout ) | m_nLocalOffset ) ) && m_nLocalOffset : !m_nLocalOffset );
}
else
{
Reset();
}
}
inline uint32 Offset()const { return m_nLocalOffset; }
void Reset()
{
m_eaLayout = 0;
m_nLocalOffset = 0;
}
bool IsNull()const
{
return !NotNull();
}
bool NotNull()const
{
// either both are null, or none is null
Assert( ( m_eaLayout == 0 ) == ( m_nLocalOffset == 0 ) );
return m_eaLayout != 0;
}
operator bool() const { return NotNull(); }
};
//
// CPs3BindTexture_t : Everything we need to bind a texture
//
// This is what the SPU needs to bind the texture
struct CPs3BindTexture_t
{
uint8 m_sampler;
uint8 m_nBindFlags;
uint8 m_UWrap;
uint8 m_VWrap;
uint8 m_WWrap;
uint8 m_minFilter;
uint8 m_magFilter;
uint8 m_mipFilter;
uint32 m_nLayout;
CPs3gcmLocalMemoryBlock *m_pLmBlock;
int m_boundStd;
int m_hTexture;
};
// This is what we store when asked to bind a texture
// When the cmd buffer is executed, at this time we lookup
// the remaining fields and pack some CPs3BindTexture_t to actually use on the SPU
struct CPs3BindParams_t
{
uint16 m_nBindTexIndex;
uint8 m_sampler;
uint8 m_nBindFlags;
int m_boundStd;
int m_hTexture;
};
#endif // INCLUDED_GCMTEXTURE_H

View File

@@ -0,0 +1,12 @@
//===== Copyright (c) 1996-2008, Valve Corporation, All rights reserved. ======//
struct PS_IN
{
float2 TexCoord : TEXCOORD;
};
sampler detail : register( s0 );
float4 main( PS_IN In ) : COLOR
{
return tex2D( detail, In.TexCoord );
}

View File

@@ -0,0 +1,23 @@
//===== Copyright (c) 1996-2008, Valve Corporation, All rights reserved. ======//
float4x4 matWVP : register(c0);
struct VS_IN
{
float4 ObjPos : POSITION;
float2 TexCoord : TEXCOORD;
};
struct VS_OUT
{
float4 ProjPos : POSITION;
float2 TexCoord : TEXCOORD;
};
VS_OUT main( VS_IN In )
{
VS_OUT Out;
Out.ProjPos = mul( In.ObjPos, matWVP );
Out.TexCoord = In.TexCoord;
return Out;
}

View File

@@ -0,0 +1,13 @@
//===== Copyright (c) 1996-2008, Valve Corporation, All rights reserved. ======//
struct PS_IN
{
float2 TexCoord : TEXCOORD;
};
sampler detail : register( s0 );
float4 main( PS_IN In ) : COLOR
{
return tex2D( detail, In.TexCoord );
}

View File

@@ -0,0 +1,54 @@
//===== Copyright (c) 1996-2008, Valve Corporation, All rights reserved. ======//
struct PS_IN
{
float2 TexCoord : TEXCOORD;
};
float SrgbGammaToLinear( float flSrgbGammaValue )
{
float x = saturate( flSrgbGammaValue );
return ( x <= 0.04045f ) ? ( x / 12.92f ) : ( pow( ( x + 0.055f ) / 1.055f, 2.4f ) );
}
float X360LinearToGamma( float flLinearValue )
{
float fl360GammaValue;
flLinearValue = saturate( flLinearValue );
if ( flLinearValue < ( 128.0f / 1023.0f ) )
{
if ( flLinearValue < ( 64.0f / 1023.0f ) )
{
fl360GammaValue = flLinearValue * ( 1023.0f * ( 1.0f / 255.0f ) );
}
else
{
fl360GammaValue = flLinearValue * ( ( 1023.0f / 2.0f ) * ( 1.0f / 255.0f ) ) + ( 32.0f / 255.0f );
}
}
else
{
if ( flLinearValue < ( 512.0f / 1023.0f ) )
{
fl360GammaValue = flLinearValue * ( ( 1023.0f / 4.0f ) * ( 1.0f / 255.0f ) ) + ( 64.0f / 255.0f );
}
else
{
fl360GammaValue = flLinearValue * ( ( 1023.0f /8.0f ) * ( 1.0f / 255.0f ) ) + ( 128.0f /255.0f );
if ( fl360GammaValue > 1.0f )
{
fl360GammaValue = 1.0f;
}
}
}
fl360GammaValue = saturate( fl360GammaValue );
return fl360GammaValue;
}
sampler detail : register( s0 );
float4 main( PS_IN In ) : COLOR
{
float4 vTextureColor = tex2D( detail, In.TexCoord );
return vTextureColor;
};

View File

@@ -0,0 +1,941 @@
//========== Copyright <20> 2010, Valve Corporation, All rights reserved. ========
#include "dxabstract.h"
#include "ps3gcmstate.h"
#include "utlmap.h"
#include "ps3/ps3gcmlabels.h"
#include "sys/tty.h"
#include "convar.h"
//#include "vjobs/spudrawqueue_shared.h"
#include "spugcm.h"
#include "memdbgon.h"
PLATFORM_OVERRIDE_MEM_ALLOC_INTERNAL_PS3_IMPL
//////////////////////////////////////////////////////////////////////////
#if 1 // #ifndef _CERT
#define TRACK_ALLOC_STATS 1
#endif
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ConVar r_ps3_gcmnocompact( "r_ps3_gcmnocompact", "0" );
ConVar r_ps3_gcmlowcompact( "r_ps3_gcmlowcompact", "0" );
#endif
static CThreadFastMutex s_AllocMutex;
static int32 s_uiGcmLocalMemoryAllocatorMutexLockCount;
struct CGcmLocalMemoryAllocatorMutexLockCounter_t
{
CGcmLocalMemoryAllocatorMutexLockCounter_t() { Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount >= 0 ); ++ s_uiGcmLocalMemoryAllocatorMutexLockCount; }
~CGcmLocalMemoryAllocatorMutexLockCounter_t() { Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount > 0 ); -- s_uiGcmLocalMemoryAllocatorMutexLockCount; }
};
#define PS3ALLOCMTX AUTO_LOCK( s_AllocMutex ); CGcmLocalMemoryAllocatorMutexLockCounter_t aLockCounter;
bool IsItSafeToRefreshFrontBufferNonInteractivePs3()
{
// NOTE: only main thread can refresh front buffer
if ( !ThreadInMainThread() )
return false;
AUTO_LOCK( s_AllocMutex );
Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount >= 0 );
return s_uiGcmLocalMemoryAllocatorMutexLockCount <= 0;
}
struct CPs3gcmLocalMemoryBlockMutable : public CPs3gcmLocalMemoryBlock
{
inline uint32 & MutableOffset() { return m_nLocalMemoryOffset; }
inline uint32 & MutableSize() { return m_uiSize; }
inline CPs3gcmAllocationType_t & MutableType() { return m_uType; }
inline uint32 & MutableIndex() { return m_uiIndex; }
};
#ifdef GCMLOCALMEMORYBLOCKDEBUG
static const uint64 g_GcmLocalMemoryBlockDebugCookieAllocated = 0xA110CA7EDA110CA7ull;
static const uint64 g_GcmLocalMemoryBlockDebugCookieFree = 0xFEEFEEFEEFEEFEEFllu;
#endif
struct CPs3gcmLocalMemoryAllocator
{
//////////////////////////////////////////////////////////////////////////
//
// Allocated memory tracking
//
uint32 m_nOffsetMin; // RSX Local Memory allocated by Initialization that will never be released
uint32 m_nOffsetMax; // Ceiling of allocatable RSX Local Memory (because the top portion is reserved for zcull/etc.), top portion managed separately
uint32 m_nOffsetUnallocated; // RSX Local Memory offset of not yet allocated memory (between Min and Max)
CUtlVector< CPs3gcmLocalMemoryBlockMutable * > m_arrAllocations; // Sorted array of all allocations
//////////////////////////////////////////////////////////////////////////
//
// Free blocks tracking
//
struct LocalMemoryAllocation_t
{
CPs3gcmLocalMemoryBlockMutable m_block;
uint32 m_uiFenceNumber;
LocalMemoryAllocation_t *m_pNext;
};
LocalMemoryAllocation_t *m_pPendingFreeBlock;
LocalMemoryAllocation_t *m_pFreeBlock;
static uint32 sm_uiFenceNumber;
uint32 m_uiFenceLastKnown;
static uint32 volatile *sm_puiFenceLocation;
//////////////////////////////////////////////////////////////////////////
//
// Implementation
//
inline bool Alloc( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock );
inline void Free( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock );
inline uint32 Reclaim( bool bForce = false );
inline void Compact();
// Helper methods
inline LocalMemoryAllocation_t * FindFreeBlock( uint32 uiAlignBytes, uint32 uiSize );
inline bool IsFenceCompleted( uint32 uiCurrentFenceValue, uint32 uiCheckStoredFenceValue );
inline void TrackAllocStats( CPs3gcmAllocationType_t uAllocType, int nDelta );
#ifdef GCMLOCALMEMORYBLOCKDEBUG
inline void ValidateAllBlocks();
#endif
}
g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolCount];
uint32 CPs3gcmLocalMemoryAllocator::sm_uiFenceNumber;
uint32 volatile * CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation;
// RSX memory usage stats tracking:
static GPUMemoryStats g_RsxMemoryStats;
struct GPUMemoryStats_Pool
{
int nDefaultPoolSize;
int nDefaultPoolUsed;
int nRTPoolUsed;
int nDynamicPoolUsed;
int nMainMemUsed;
int nUnknownPoolUsed;
};
GPUMemoryStats_Pool g_RsxMemoryStats_Pool;
static inline uint32 Ps3gcmHelper_ComputeTiledAreaMemorySize( uint32 nCount, uint32 w, uint32 h, uint32 bpp )
{
uint32 nTilePitch = cellGcmGetTiledPitchSize( w * bpp );
uint32 uiSize = nTilePitch * AlignValue( h, 32 );
uiSize *= nCount;
uiSize = AlignValue( uiSize, PS3GCMALLOCATIONALIGN( kAllocPs3gcmColorBufferMisc ) );
return uiSize;
}
void Ps3gcmLocalMemoryAllocator_Init()
{
PS3ALLOCMTX
if ( !CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation )
{
CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation = cellGcmGetLabelAddress( GCM_LABEL_MEMORY_FREE );
*CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation = 0;
}
// Pool boundaries
uint32 uiGcmAllocBegin = g_ps3gcmGlobalState.m_nLocalBaseOffset;
uint32 uiGcmAllocEnd = uiGcmAllocBegin + g_ps3gcmGlobalState.m_nLocalSize;
// Memory should be allocated for large frame buffers
uint32 uiMemorySizeBuffer[2] = { MAX( 1280, g_ps3gcmGlobalState.m_nRenderSize[0] ), MAX( 720, g_ps3gcmGlobalState.m_nRenderSize[1] ) };
uint32 uiFactor[2] = { uiMemorySizeBuffer[0]*uiMemorySizeBuffer[1], 1280*720 };
// Configuration of pool memory (can be #ifdef'd for every game)
static const uint32 s_PoolMemoryLayout[/*kGcmAllocPoolCount*/] =
{
#if defined( CSTRIKE15 )
// mhansen - We had to adjust the memory values a bit for cstrike15 to get a map to load
// PS3_BUILDFIX - We need to revisit this to determine the proper size later on
// mdonofrio - render target allocations revisited for PS3
// potential to save some more (~12Mb) from TiledColourFB (only need two really.
// wait for other rendering optimisation/rework to be finished first before attempting.
/*kGcmAllocPoolDefault = */ 0,
/*kGcmAllocPoolDynamicNewPath = */ 5 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolDynamic = */ 11 * 1024 * 1024, // 11 MB
/*kGcmAllocPoolTiledColorFB = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2 + CPs3gcmDisplay::SURFACE_COUNT, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // 3 buffers allocated in CreateRSXBuffers + 2 _rt_fullFrameFB - can probably get this down to 2 if we 1. don't use MLAA and 2. we clean up the post-pro rendering to use the front buffer as a textureand 3. tidy up aliasing for rt_fullframeFB and rt_fullFrameFB1
/*kGcmAllocPoolTiledColorFBQ = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, uiMemorySizeBuffer[0]/4, uiMemorySizeBuffer[1]/4, 4 ), // fits 2 1/4 size framebuffer textures
/*kGcmAllocPoolTiledColor512 = */ 0,
/*kGcmAllocPoolTiledColorMisc = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 1, 640, 640, 4 ) + Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, 1024, 512, 4) + Ps3gcmHelper_ComputeTiledAreaMemorySize(1, 32, 32, 4), // // 1x 1/2 size smoke/fog buffer, 2xWater(1024x512x32bpp), EyeGlint(32x32x32bpp), *Monitor(256x256x32bpp), *RTTFlashlightShadows(864x864x8bpp) - * we don't need these for CS15
/*kGcmAllocPoolTiledD24S8 = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // only 2 depth buffer targets required (current and saved off), + reserve space for 1/2 size depth buffer for smoke/fog
/*kGcmAllocPoolMainMemory = */ 0, // configured based on mapped IO memory
/*kGcmAllocPoolMallocMemory = */ 0, // using malloc
#else
/*kGcmAllocPoolDefault = */ 0,
/*kGcmAllocPoolDynamicNewPath = */ 5 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolDynamic = */ 10 * 1024 * 1024, // 10 MB
/*kGcmAllocPoolTiledColorFB = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2 * CPs3gcmDisplay::SURFACE_COUNT, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // fits 6 of full framebuffer textures
/*kGcmAllocPoolTiledColorFBQ = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 4, uiMemorySizeBuffer[0]/4, uiMemorySizeBuffer[1]/4, 4 ), // fits 4 quarters of framebuffer textures
/*kGcmAllocPoolTiledColor512 = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, 512, 512, 4 ), // fits 2 512x512 RGBA textures
/*kGcmAllocPoolTiledColorMisc = */ 5 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolTiledD24S8 = */ uint64( 15 * 1024 * 1024 ) * uiFactor[0]/uiFactor[1], // 15 MB
/*kGcmAllocPoolMainMemory = */ 0, // configured based on mapped IO memory
/*kGcmAllocPoolMallocMemory = */ 0, // using malloc
#endif
};
COMPILE_TIME_ASSERT( ARRAYSIZE( s_PoolMemoryLayout ) == ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ) );
for ( int j = ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); j -- > 0; )
{
const uint32 uiSize = AlignValue( s_PoolMemoryLayout[j], 1024 * 1024 ); // Align it on 1 MB boundaries, all our pools are large
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax = uiGcmAllocEnd;
uiGcmAllocEnd -= uiSize;
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetUnallocated = uiGcmAllocEnd;
}
// Default pool setup (rest of local memory)
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMax = uiGcmAllocEnd;
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetUnallocated = uiGcmAllocBegin;
// Main memory mapped pool
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetUnallocated = uint32( g_ps3gcmGlobalState.m_pRsxMainMemoryPoolBuffer ) + g_ps3gcmGlobalState.m_nIoOffsetDelta;
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMax = g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMin + g_ps3gcmGlobalState.m_nRsxMainMemoryPoolBufferSize;
// Store initial capacity for memory stats tracking:
g_RsxMemoryStats.nGPUMemSize = g_ps3gcmGlobalState.m_nLocalSize;
g_RsxMemoryStats_Pool.nDefaultPoolSize = g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMin;
//
// Setup preset tiled regions
//
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColorFB;
uint8 uiBank = 0; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( g_ps3gcmGlobalState.m_nRenderSize[0] * 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColorFBQ;
uint8 uiBank = 1; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( g_ps3gcmGlobalState.m_nRenderSize[0] * 4 / 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColor512;
uint8 uiBank = 2; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( 512 * 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
#ifndef _CERT
static const char * s_PoolMemoryNames[] =
{
/*kGcmAllocPoolDefault = */ "Default Pool",
/*kGcmAllocPoolDynamicNewPath = */ "Dynamic New ",
/*kGcmAllocPoolDynamic = */ "Dynamic IBVB",
/*kGcmAllocPoolTiledColorFB = */ "FullFrameRTs",
/*kGcmAllocPoolTiledColorFBQ = */ "1/4Frame RTs",
/*kGcmAllocPoolTiledColor512 = */ "512x512 RTs ",
/*kGcmAllocPoolTiledColorMisc = */ "All Misc RTs",
/*kGcmAllocPoolTiledD24S8 = */ "DepthStencil",
/*kGcmAllocPoolMainMemory = */ "Main Memory ",
/*kGcmAllocPoolMallocMemory = */ "MallocMemory",
};
COMPILE_TIME_ASSERT( ARRAYSIZE( s_PoolMemoryNames ) == ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ) );
Msg( "RSX Local Memory layout:\n" );
for ( int j = 0; j < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ j )
{
Msg( " %s 0x%08X - 0x%08X [ %9.3f MB ]\n",
s_PoolMemoryNames[j],
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax,
(g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin) / 1024.f / 1024.f );
}
Msg( "Total size: %d MB\n", g_ps3gcmGlobalState.m_nLocalSize / 1024 / 1024 );
#endif
}
void Ps3gcmLocalMemoryAllocator_Reclaim()
{
PS3ALLOCMTX
for ( int k = 0; k < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ k )
g_ps3gcmLocalMemoryAllocator[ k ].Reclaim();
}
void Ps3gcmLocalMemoryAllocator_Compact()
{
#define PS3GCMCOMPACTPROFILE 0
#if PS3GCMCOMPACTPROFILE
float flTimeStart = Plat_FloatTime();
uint32 uiFree = g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated;
#endif
// Let RSX wait for final flip
GCM_FUNC( cellGcmSetWaitFlip );
// Let PPU wait for all RSX commands done (include waitFlip)
g_ps3gcmGlobalState.CmdBufferFinish();
#if PS3GCMCOMPACTPROFILE
float flTimeWait = Plat_FloatTime() - flTimeStart;
#endif
{
PS3ALLOCMTX
for ( int k = 0; k < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ k )
{
g_ps3gcmLocalMemoryAllocator[ k ].Compact();
}
}
#if PS3GCMCOMPACTPROFILE
float flTimePrepareTransfer = Plat_FloatTime() - flTimeStart;
#endif
// Wait for all RSX memory to be transferred
g_ps3gcmGlobalState.CmdBufferFinish();
#if PS3GCMCOMPACTPROFILE
float flTimeDone = Plat_FloatTime() - flTimeStart;
char chBuffer[64];
Q_snprintf( chBuffer, ARRAYSIZE( chBuffer ), "COMPACT: %0.3f / %0.3f / %0.3f sec\n",
flTimeWait, flTimePrepareTransfer, flTimeDone );
uint32 dummy;
sys_tty_write( SYS_TTYP6, chBuffer, Q_strlen( chBuffer ), &dummy );
Q_snprintf( chBuffer, ARRAYSIZE( chBuffer ), "COMPACT: %0.3f -> %0.3f MB (%0.3f MB free)\n",
uiFree / 1024.f / 1024.f, g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated / 1024.f / 1024.f,
(g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
sys_tty_write( SYS_TTYP6, chBuffer, Q_strlen( chBuffer ), &dummy );
#endif
}
void Ps3gcmLocalMemoryAllocator_CompactWithReason( char const *szReason )
{
double flTimeCompactStart = Plat_FloatTime();
DevMsg( "====== GCM LOCAL MEMORY COMPACT : %s =====\n", szReason );
uint32 uiFreeMemoryBeforeCompact = g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated;
DevMsg( "RSX Local Memory Free: %0.3f MB; compacting...\n", (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
Ps3gcmLocalMemoryAllocator_Compact();
DevMsg( "RSX Local Memory Compacted %0.3f MB in %0.3f sec\n",
(uiFreeMemoryBeforeCompact - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f,
Plat_FloatTime() - flTimeCompactStart );
DevMsg( "RSX Local Memory Free: %0.3f MB\n", (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
}
bool CPs3gcmLocalMemoryBlock::Alloc()
{
PS3ALLOCMTX
return g_ps3gcmLocalMemoryAllocator[PS3GCMALLOCATIONPOOL(m_uType)].Alloc( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( this ) );
}
void CPs3gcmLocalMemoryBlock::Free()
{
PS3ALLOCMTX
g_ps3gcmLocalMemoryAllocator[PS3GCMALLOCATIONPOOL(m_uType)].Free( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( this ) );
}
//////////////////////////////////////////////////////////////////////////
//
// Private implementation of PS3 local memory allocator
//
inline bool CPs3gcmLocalMemoryAllocator::Alloc( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock )
{
TrackAllocStats( pBlock->MutableType(), pBlock->MutableSize() );
uint32 uAlignBytes = PS3GCMALLOCATIONALIGN( pBlock->MutableType() );
Assert( IsPowerOfTwo( uAlignBytes ) );
double flAllocatorStallTime = 0.0f;
bool bCompactPerformed = true;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
bCompactPerformed = !r_ps3_gcmlowcompact.GetBool();
#endif
retry_allocation:
// Try to find a free block
if ( LocalMemoryAllocation_t *pFreeBlock = FindFreeBlock( uAlignBytes, pBlock->MutableSize() ) )
{
pBlock->MutableOffset() = pFreeBlock->m_block.MutableOffset();
pBlock->MutableIndex() = pFreeBlock->m_block.MutableIndex();
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_arrAllocations[ pBlock->MutableIndex() ] != &pFreeBlock->m_block )
Error( "<vitaliy> GCM Local Memory Allocator Error (attempt to reuse invalid free block)!" );
#endif
m_arrAllocations[ pBlock->MutableIndex() ] = reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( pBlock );
delete pFreeBlock;
}
else if ( this != &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMallocMemory ] )
{
// Allocate new block
uint32 uiOldUnallocatedEdge = m_nOffsetUnallocated;
uint32 uiFreeBlock = ( m_nOffsetUnallocated + uAlignBytes - 1 ) & ~( uAlignBytes - 1 );
// Check if there's enough space in this pool for the requested block
if ( uiFreeBlock + pBlock->MutableSize() > m_nOffsetMax )
{
// There's not enough space in this pool
if ( m_pPendingFreeBlock )
{
// There are pending free blocks, we just need to wait for
// RSX to finish rendering using them
if ( !flAllocatorStallTime )
{
flAllocatorStallTime = Plat_FloatTime();
g_ps3gcmGlobalState.CmdBufferFlush( CPs3gcmGlobalState::kFlushForcefully );
}
while ( Reclaim() < pBlock->MutableSize() && m_pPendingFreeBlock )
{
ThreadSleep( 1 );
}
goto retry_allocation;
}
else if ( !bCompactPerformed )
{
// Let PPU wait for all RSX commands done
g_ps3gcmGlobalState.CmdBufferFinish();
uint32 uiFragmentedFreeSpace = m_nOffsetMax - m_nOffsetUnallocated;
for ( LocalMemoryAllocation_t *pFreeFragment = m_pFreeBlock; pFreeFragment; pFreeFragment = pFreeFragment->m_pNext )
uiFragmentedFreeSpace += pFreeFragment->m_block.MutableSize();
Warning(
"**************** GCM LOCAL MEMORY LOW *****************\n"
"<vitaliy> GCM Local Memory Allocator#%d pool compacting!\n"
" Requested allocation %u bytes.\n"
" Pool capacity %u bytes.\n"
" Free fragmented space %u bytes.\n"
" Unallocated %u bytes.\n"
" Used %u bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
( uint32 ) pBlock->MutableSize(),
m_nOffsetMax - m_nOffsetMin,
uiFragmentedFreeSpace,
m_nOffsetMax - m_nOffsetUnallocated,
m_nOffsetUnallocated - m_nOffsetMin
);
Compact();
Warning( " ---> Compacted pool#%d has %u unallocated bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
m_nOffsetMax - m_nOffsetUnallocated );
bCompactPerformed = true;
// Wait for all RSX memory to be transferred
g_ps3gcmGlobalState.CmdBufferFinish();
goto retry_allocation;
}
else
{
// Main memory pool returns failure so caller can try local pool.
if (this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ]) return false;
uint32 uiFragmentedFreeSpace = m_nOffsetMax - m_nOffsetUnallocated;
for ( LocalMemoryAllocation_t *pFreeFragment = m_pFreeBlock; pFreeFragment; pFreeFragment = pFreeFragment->m_pNext )
uiFragmentedFreeSpace += pFreeFragment->m_block.MutableSize();
Error(
"********* OUT OF GCM LOCAL MEMORY ********************\n"
"<vitaliy> GCM Local Memory Allocator#%d pool exhausted!\n"
" Failed allocation %u bytes.\n"
" Pool capacity %u bytes.\n"
" Free fragmented space %u bytes.\n"
" Unallocated %u bytes.\n"
" Used %u bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
( uint32 ) pBlock->MutableSize(),
m_nOffsetMax - m_nOffsetMin,
uiFragmentedFreeSpace,
m_nOffsetMax - m_nOffsetUnallocated,
m_nOffsetUnallocated - m_nOffsetMin
);
}
}
// update the pointer to "unallocated" realm
m_nOffsetUnallocated = uiFreeBlock + pBlock->MutableSize();
// this is the last allocation so far
pBlock->MutableIndex() = m_arrAllocations.AddToTail( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( pBlock ) );
pBlock->MutableOffset() = uiFreeBlock;
}
else
{
MEM_ALLOC_CREDIT_( "GCM Malloc Pool" );
void *pvMallocMemory = MemAlloc_AllocAligned( pBlock->MutableSize(), uAlignBytes );
pBlock->MutableOffset() = (uint32) pvMallocMemory;
pBlock->MutableIndex() = ~0;
}
if ( flAllocatorStallTime )
g_ps3gcmGlobalState.m_flAllocatorStallTimeWaitingRSX += Plat_FloatTime() - flAllocatorStallTime;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
// PS3 doesn't allow more than 8 zcull regions (index 0..7)
if ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_arrAllocations.Count() > 8 )
Error( "PS3 number of zcull regions exceeded!\n" );
// PS3 doesn't allow more than 15 tiles regions (index 0..14)
if ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_arrAllocations.Count() +
g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorMisc].m_arrAllocations.Count() +
( kGcmAllocPoolTiledColorMisc - kGcmAllocPoolTiledColorFB )
> 15 )
Error( "PS3 number of tiled regions exceeded!\n" );
pBlock->m_dbgGuardCookie = g_GcmLocalMemoryBlockDebugCookieAllocated;
#endif
return true;
}
inline void CPs3gcmLocalMemoryAllocator::Free( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock )
{
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( !pBlock ||
pBlock->m_dbgGuardCookie != g_GcmLocalMemoryBlockDebugCookieAllocated ||
( ( pBlock->MutableIndex() != ~0 ) && ( m_arrAllocations[ pBlock->MutableIndex() ] != pBlock ) ) )
{
//DebuggerBreak();
Error( "<vitaliy> Attempt to free not allocated GCM local memory block!" );
}
pBlock->m_dbgGuardCookie = g_GcmLocalMemoryBlockDebugCookieFree;
#endif
LocalMemoryAllocation_t *pDealloc = new LocalMemoryAllocation_t;
pDealloc->m_block = *pBlock;
pDealloc->m_uiFenceNumber = ++ sm_uiFenceNumber;
pDealloc->m_pNext = m_pPendingFreeBlock;
GCM_FUNC( cellGcmSetWriteBackEndLabel, GCM_LABEL_MEMORY_FREE, sm_uiFenceNumber );
m_pPendingFreeBlock = pDealloc;
TrackAllocStats( pBlock->MutableType(), - pBlock->MutableSize() );
if ( pBlock->MutableIndex() != ~0 )
{
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_arrAllocations[ pBlock->MutableIndex() ] != pBlock )
Error( "<vitaliy> GCM Local Memory Allocator Error (freeing block that is not properly registered)!" );
#endif
m_arrAllocations[ pBlock->MutableIndex() ] = &pDealloc->m_block;
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
pBlock->MutableOffset() = ~0;
pBlock->MutableIndex() = ~0;
#endif
}
inline bool CPs3gcmLocalMemoryAllocator::IsFenceCompleted( uint32 uiCurrentFenceValue, uint32 uiCheckStoredFenceValue )
{
#if GCM_ALLOW_NULL_FLIPS
extern bool g_ps3_nullflips;
if ( g_ps3_nullflips )
return true;
#endif
// Needs to handle the counter wrapping around
return ( ( uiCurrentFenceValue - m_uiFenceLastKnown ) >= ( uiCheckStoredFenceValue - m_uiFenceLastKnown ) );
}
inline uint32 CPs3gcmLocalMemoryAllocator::Reclaim( bool bForce )
{
uint32 uiLargestBlockSizeReclaimed = 0;
uint32 uiCurrentFenceValue = *sm_puiFenceLocation;
// Walk pending free blocks and see if they are no longer
// in use by RSX:
LocalMemoryAllocation_t **p = &m_pPendingFreeBlock;
if ( !bForce ) while ( (*p) && !IsFenceCompleted( uiCurrentFenceValue, (*p)->m_uiFenceNumber ) )
p = &( (*p)->m_pNext );
// Now p is pointing to the chain of free blocks
// chain that has been completed (due to the nature of
// pushing new deallocation at the head of the pending
// list)
if ( *p )
{
LocalMemoryAllocation_t *pCompletedChain = *p;
*p = NULL; // Terminate the chain
// Handle the special case of malloc reclaim - free all memory
if ( this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMallocMemory ] )
{
MEM_ALLOC_CREDIT_( "GCM Malloc Pool" );
for ( LocalMemoryAllocation_t *pActualFree = pCompletedChain; pActualFree; )
{
MemAlloc_FreeAligned( pActualFree->m_block.DataInMallocMemory() );
LocalMemoryAllocation_t *pDelete = pActualFree;
pActualFree = pActualFree->m_pNext;
delete pDelete;
}
pCompletedChain = NULL;
}
// Relink the completed pending chain into
// the free blocks chain
LocalMemoryAllocation_t **ppFree = &m_pFreeBlock;
while ( *ppFree )
ppFree = &( (*ppFree)->m_pNext );
*ppFree = pCompletedChain;
// Recompute actual free sizes of the completed chain
// Actual free size is the delta between block offset and next block offset
// When there's no next block then its delta between block offset and unallocated edge
for ( LocalMemoryAllocation_t *pActualFree = pCompletedChain; pActualFree; pActualFree = pActualFree->m_pNext )
{
uint32 uiIdx = pActualFree->m_block.MutableIndex() + 1;
uint32 uiNextOffset = m_nOffsetUnallocated;
if ( uiIdx < m_arrAllocations.Count() )
{
CPs3gcmLocalMemoryBlockMutable * RESTRICT pNextBlock = m_arrAllocations[ uiIdx ];
uiNextOffset = pNextBlock->Offset();
}
uint32 uiActualBlockSize = uiNextOffset - pActualFree->m_block.Offset();
pActualFree->m_block.MutableSize() = uiActualBlockSize;
uiLargestBlockSizeReclaimed = MAX( uiLargestBlockSizeReclaimed, uiActualBlockSize );
}
}
// Remember the last known fence value
m_uiFenceLastKnown = uiCurrentFenceValue;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
#endif
return uiLargestBlockSizeReclaimed;
}
inline CPs3gcmLocalMemoryAllocator::LocalMemoryAllocation_t * CPs3gcmLocalMemoryAllocator::FindFreeBlock( uint32 uiAlignBytes, uint32 uiSize )
{
LocalMemoryAllocation_t **ppBest = NULL;
uint32 uiSizeMax = uiSize * 11/10; // we don't want to inflate requested size by > 10%
for ( LocalMemoryAllocation_t **p = &m_pFreeBlock;
(*p);
p = &( (*p)->m_pNext ) )
{
if ( (*p)->m_block.MutableSize() >= uiSize && (*p)->m_block.MutableSize() <= uiSizeMax &&
!( (*p)->m_block.Offset() & ( uiAlignBytes - 1 ) ) )
{
if ( !ppBest || ( (*p)->m_block.MutableSize() <= (*ppBest)->m_block.MutableSize() ) )
{
ppBest = p;
}
}
}
if ( ppBest )
{
LocalMemoryAllocation_t *pFree = (*ppBest);
(*ppBest) = pFree->m_pNext;
pFree->m_pNext = NULL;
return pFree;
}
return NULL;
}
inline bool TrackAllocStats_Pool( CPs3gcmAllocationType_t uAllocType, int nDelta )
{
CPs3gcmAllocationPool_t pool = PS3GCMALLOCATIONPOOL( uAllocType );
int *stat = &g_RsxMemoryStats_Pool.nUnknownPoolUsed;
bool bInRSXMem = true;
switch( pool )
{
case kGcmAllocPoolDefault:
stat = &g_RsxMemoryStats_Pool.nDefaultPoolUsed;
break;
case kGcmAllocPoolDynamicNewPath:
case kGcmAllocPoolDynamic:
stat = &g_RsxMemoryStats_Pool.nDynamicPoolUsed;
break;
case kGcmAllocPoolTiledColorFB:
case kGcmAllocPoolTiledColorFBQ:
case kGcmAllocPoolTiledColor512:
case kGcmAllocPoolTiledColorMisc:
case kGcmAllocPoolTiledD24S8:
stat = &g_RsxMemoryStats_Pool.nRTPoolUsed;
break;
case kGcmAllocPoolMainMemory: // Unused, unless PS3GCM_VBIB_IN_IO_MEMORY set to 1
case kGcmAllocPoolMallocMemory:
stat = &g_RsxMemoryStats_Pool.nMainMemUsed;
bInRSXMem = false; // In main memory!
break;
}
*stat += nDelta;
Assert( 0 <= (int)*stat );
// Report free memory only from the default pool (the other pools are pre-sized to fixed limits, and all
// geom/textures go into the default pool, so that's where content-driven variation/failures will occur)
g_RsxMemoryStats.nGPUMemFree = g_RsxMemoryStats_Pool.nDefaultPoolSize - g_RsxMemoryStats_Pool.nDefaultPoolUsed;
return bInRSXMem;
}
inline void CPs3gcmLocalMemoryAllocator::TrackAllocStats( CPs3gcmAllocationType_t uAllocType, int nDelta )
{
#if TRACK_ALLOC_STATS
// Early-out for allocations not in RSX memory:
if ( !TrackAllocStats_Pool( uAllocType, nDelta ) )
return;
unsigned int *stat = &g_RsxMemoryStats.nUnknown;
switch( uAllocType )
{
case kAllocPs3gcmColorBufferMisc:
case kAllocPs3gcmColorBufferFB:
case kAllocPs3gcmColorBufferFBQ:
case kAllocPs3gcmColorBuffer512:
case kAllocPs3gcmDepthBuffer:
stat = &g_RsxMemoryStats.nRTSize;
break;
case kAllocPs3gcmTextureData:
case kAllocPs3gcmTextureData0:
stat = &g_RsxMemoryStats.nTextureSize;
break;
case kAllocPs3GcmVertexBuffer:
stat = &g_RsxMemoryStats.nVBSize;
break;
case kAllocPs3GcmIndexBuffer:
stat = &g_RsxMemoryStats.nIBSize;
break;
case kAllocPs3GcmShader:
case kAllocPs3GcmEdgeGeomBuffer:
case kAllocPs3GcmVertexBufferDynamic:
case kAllocPs3GcmIndexBufferDynamic:
case kAllocPs3GcmDynamicBufferPool:
case kAllocPs3GcmVertexBufferDma:
case kAllocPs3GcmIndexBufferDma:
// Treat these as misc unless they become big/variable
break;
}
*stat += nDelta;
Assert( 0 <= (int)*stat );
#endif // TRACK_ALLOC_STATS
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
#define VALIDATECONDITION( x ) if( !( x ) ) { Error( "<vitaliy> GCM Local Memory Allocation block %p index %d is corrupt [line %d]!\n", pBlock, k, __LINE__ ); }
inline void CPs3gcmLocalMemoryAllocator::ValidateAllBlocks()
{
// Traverse the allocated list and validate debug guards and patch-back indices
CUtlVector< uint32 > arrFreeBlocksIdx;
uint32 uiLastAllocatedOffset = m_nOffsetMin;
for ( int k = 0, kEnd = m_arrAllocations.Count(); k < kEnd; ++ k )
{
CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock = m_arrAllocations[k];
VALIDATECONDITION( pBlock );
VALIDATECONDITION( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieAllocated || pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree );
VALIDATECONDITION( pBlock->MutableIndex() < m_arrAllocations.Count() );
VALIDATECONDITION( pBlock->MutableIndex() == k );
VALIDATECONDITION( m_arrAllocations[ pBlock->MutableIndex() ] == pBlock );
VALIDATECONDITION( pBlock->Offset() >= uiLastAllocatedOffset );
uiLastAllocatedOffset = pBlock->Offset() + pBlock->MutableSize();
VALIDATECONDITION( uiLastAllocatedOffset <= m_nOffsetMax );
if ( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree )
arrFreeBlocksIdx.AddToTail( k );
}
// Traverse free lists and validate
LocalMemoryAllocation_t * arrFree[] = { m_pPendingFreeBlock, m_pFreeBlock };
for ( int j = 0; j < ARRAYSIZE( arrFree ); ++ j )
for ( LocalMemoryAllocation_t *p = arrFree[j]; p; p = p->m_pNext )
{
int k = j;
CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock = &p->m_block;
VALIDATECONDITION( pBlock );
VALIDATECONDITION( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree );
k = pBlock->MutableIndex();
if ( pBlock->MutableIndex() != ~0 )
{
VALIDATECONDITION( pBlock->MutableIndex() < m_arrAllocations.Count() );
VALIDATECONDITION( m_arrAllocations[ pBlock->MutableIndex() ] == pBlock );
VALIDATECONDITION( arrFreeBlocksIdx.FindAndFastRemove( pBlock->MutableIndex() ) );
}
}
int k = 0;
void *pBlock = 0;
VALIDATECONDITION( !arrFreeBlocksIdx.Count() );
}
#endif
inline void CPs3gcmLocalMemoryAllocator::Compact()
{
GCM_PERF_PUSH_MARKER( "LocalMemory:Compact" );
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
if ( r_ps3_gcmnocompact.GetBool() )
return;
#endif
// Reclaim all memory (NOTE: all pending blocks must be reclaimed since both RSX and PPU have stopped rendering!)
Reclaim();
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_pPendingFreeBlock )
Warning( "GCM Local Memory Allocator Compact forces pending free blocks to be reclaimed.\n" );
ValidateAllBlocks();
#endif
if ( m_pPendingFreeBlock )
Reclaim( true );
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_pPendingFreeBlock )
Error( "<vitaliy> GCM Local Memory Allocator Compact requires RSX and PPU rendering to be paused! (pending free blocks have not been reclaimed)\n" );
ValidateAllBlocks();
#endif
// Walk the free blocks chain and patch-back NULL pointers into allocation tracking system
while ( m_pFreeBlock )
{
LocalMemoryAllocation_t *p = m_pFreeBlock;
m_pFreeBlock = p->m_pNext;
m_arrAllocations[ p->m_block.MutableIndex() ] = NULL;
delete p;
}
Assert( !m_pFreeBlock && !m_pPendingFreeBlock );
// These are elements requiring reallocation
uint32 uiCount = m_arrAllocations.Count();
CPs3gcmLocalMemoryBlockMutable **pReallocationBlocks = m_arrAllocations.Base();
// Here "correct" implementation would be to copy off m_arrAllocations vector onto stack for iteration,
// RemoveAll from m_arrAllocations vector and allocate all blocks again.
// We will cheat since we know that we will allocate same number of elements and directly write zero
// into m_arrAllocations m_Size member, then we will still be able to use the memory of the vector
// for reading blocks requiring compact reallocation, and AddToTail will still fill the vector with
// correct data.
struct AllocatorCompactVectorCheat : public CUtlVector< CPs3gcmLocalMemoryBlockMutable * > { inline void ResetCountPreservingMemoryContents() { m_Size = 0; } };
( ( AllocatorCompactVectorCheat * ) ( char * ) &m_arrAllocations )->ResetCountPreservingMemoryContents();
m_nOffsetUnallocated = m_nOffsetMin;
// Prepare RSX for data buffer transfers in local memory
uint nTransferMode = ( ( this - &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ] ) < kGcmAllocPoolMainMemory ) ? CELL_GCM_TRANSFER_LOCAL_TO_LOCAL : CELL_GCM_TRANSFER_MAIN_TO_MAIN;
Assert( nTransferMode < 4 );
GCM_FUNC( cellGcmSetTransferDataMode, nTransferMode ); // unnecessary if we do this on SPU
Assert( !g_spuGcm.IsDeferredDrawQueue() );
// Reallocate all blocks
for ( ; uiCount; -- uiCount, ++ pReallocationBlocks )
{
CPs3gcmLocalMemoryBlockMutable *pBlock = *pReallocationBlocks;
if ( !pBlock )
continue;
uint32 nOldOffset = pBlock->Offset();
TrackAllocStats( pBlock->MutableType(), - pBlock->MutableSize() );
Alloc( pBlock );
if ( nOldOffset == pBlock->Offset() )
continue;
// Have RSX transfer blocks data. RSX may hang if there's WriteLabel between the Format and Offset commands,
// so reserve space for both of them up front
SpuDrawTransfer_t * pTransfer = g_spuGcm.GetDrawQueue()->AllocWithHeader<SpuDrawTransfer_t>( SPUDRAWQUEUE_TRANSFER_METHOD | nTransferMode );
pTransfer->m_nLineSize = pBlock->MutableSize();
pTransfer->m_nOldOffset = nOldOffset;
pTransfer->m_nNewOffset = pBlock->Offset();
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
#endif
GCM_PERF_MARKER( "Compact:Complete" );
}
//////////////////////////////////////////////////////////////////////////
//
// Computation of tiled memory
//
uint32 CPs3gcmLocalMemoryBlock::TiledMemoryTagAreaBase() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledColorMisc ) // Misc color tiles are placed at the front of tag area after preset pools
return ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are placed in the end of tag area (0-0x7FF is offset range)
return 0x800 - ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_nOffsetMin + m_uiSize ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColorFB ) // FB color tiles go first
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColorFBQ ) // FBQ color tiles go next
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFBQ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColor512 ) // 512 color tiles go next
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColor512].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute tiled memory tag base from a non-tiled-pool allocation!\n" );
#endif
return ~0;
}
uint32 CPs3gcmLocalMemoryBlock::TiledMemoryIndex() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledColorMisc ) // Color tiles are placed in the front
return m_uiIndex + kGcmAllocPoolTiledColorMisc - kGcmAllocPoolTiledColorFB;
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are placed as last tiles
return 14 - m_uiIndex;
return ePool - kGcmAllocPoolTiledColorFB;
}
uint32 CPs3gcmLocalMemoryBlock::ZcullMemoryIndex() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are the only zcull tiles
return m_uiIndex;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute zcull index from a non-zcull allocation!\n" );
#endif
return ~0;
}
uint32 CPs3gcmLocalMemoryBlock::ZcullMemoryStart() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are the only zcull tiles
return ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_nOffsetMin ) / 4; // 1 byte per pixel, D24S8 is 4 bytes per pixel, implicitly 4096 aligned because offset is 64Kb aligned
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute zcull memory start from a non-zcull allocation!\n" );
#endif
return ~0;
}
//////////////////////////////////////////////////////////////////////////
//
// Allow shaderapi to query GPU memory stats:
//
void GetGPUMemoryStats( GPUMemoryStats &stats )
{
stats = g_RsxMemoryStats;
}

View File

@@ -0,0 +1,995 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// GCM memory allocation mgmt
//
//==================================================================================================
#include "utlmap.h"
#include "sys/tty.h"
#include "convar.h"
#include "ps3gcmmemory.h"
#include "gcmlabels.h"
#include "gcmstate.h"
#include "gcmdrawstate.h"
#include "memdbgon.h"
PLATFORM_OVERRIDE_MEM_ALLOC_INTERNAL_PS3_IMPL
#define HARDWARE_CURSOR_SIZE (64*64*4)
//--------------------------------------------------------------------------------------------------
// GCM memory allocators
//--------------------------------------------------------------------------------------------------
#if 1 // #ifndef _CERT
#define TRACK_ALLOC_STATS 1
#endif
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ConVar r_ps3_gcmnocompact( "r_ps3_gcmnocompact", "0" );
ConVar r_ps3_gcmlowcompact( "r_ps3_gcmlowcompact", "1" );
#endif
static CThreadFastMutex s_AllocMutex;
static int32 s_uiGcmLocalMemoryAllocatorMutexLockCount;
struct CGcmLocalMemoryAllocatorMutexLockCounter_t
{
CGcmLocalMemoryAllocatorMutexLockCounter_t() { Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount >= 0 ); ++ s_uiGcmLocalMemoryAllocatorMutexLockCount; }
~CGcmLocalMemoryAllocatorMutexLockCounter_t() { Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount > 0 ); -- s_uiGcmLocalMemoryAllocatorMutexLockCount; }
};
#define PS3ALLOCMTX AUTO_LOCK( s_AllocMutex ); CGcmLocalMemoryAllocatorMutexLockCounter_t aLockCounter;
bool IsItSafeToRefreshFrontBufferNonInteractivePs3()
{
// NOTE: only main thread can refresh front buffer
if ( !ThreadInMainThread() )
return false;
AUTO_LOCK( s_AllocMutex );
Assert( s_uiGcmLocalMemoryAllocatorMutexLockCount >= 0 );
return s_uiGcmLocalMemoryAllocatorMutexLockCount <= 0;
}
struct CPs3gcmLocalMemoryBlockMutable : public CPs3gcmLocalMemoryBlock
{
inline uint32 & MutableOffset() { return m_nLocalMemoryOffset; }
inline uint32 & MutableSize() { return m_uiSize; }
inline CPs3gcmAllocationType_t & MutableType() { return m_uType; }
inline uint32 & MutableIndex() { return m_uiIndex; }
};
#ifdef GCMLOCALMEMORYBLOCKDEBUG
static const uint64 g_GcmLocalMemoryBlockDebugCookieAllocated = 0xA110CA7EDA110CA7ull;
static const uint64 g_GcmLocalMemoryBlockDebugCookieFree = 0xFEEFEEFEEFEEFEEFllu;
#endif
struct CPs3gcmLocalMemoryAllocator
{
//////////////////////////////////////////////////////////////////////////
//
// Allocated memory tracking
//
uint32 m_nOffsetMin; // RSX Local Memory allocated by Initialization that will never be released
uint32 m_nOffsetMax; // Ceiling of allocatable RSX Local Memory (because the top portion is reserved for zcull/etc.), top portion managed separately
uint32 m_nOffsetUnallocated; // RSX Local Memory offset of not yet allocated memory (between Min and Max)
CUtlVector< CPs3gcmLocalMemoryBlockMutable * > m_arrAllocations; // Sorted array of all allocations
//////////////////////////////////////////////////////////////////////////
//
// Free blocks tracking
//
struct LocalMemoryAllocation_t
{
CPs3gcmLocalMemoryBlockMutable m_block;
uint32 m_uiFenceNumber;
LocalMemoryAllocation_t *m_pNext;
};
LocalMemoryAllocation_t *m_pPendingFreeBlock;
LocalMemoryAllocation_t *m_pFreeBlock;
static uint32 sm_uiFenceNumber;
uint32 m_uiFenceLastKnown;
static uint32 volatile *sm_puiFenceLocation;
//////////////////////////////////////////////////////////////////////////
//
// Implementation
//
inline bool Alloc( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock );
inline void Free( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock );
inline uint32 Reclaim( bool bForce = false );
inline void Compact();
// Helper methods
inline LocalMemoryAllocation_t * FindFreeBlock( uint32 uiAlignBytes, uint32 uiSize );
inline bool IsFenceCompleted( uint32 uiCurrentFenceValue, uint32 uiCheckStoredFenceValue );
inline void TrackAllocStats( CPs3gcmAllocationType_t uAllocType, int nDelta );
#ifdef GCMLOCALMEMORYBLOCKDEBUG
inline void ValidateAllBlocks();
#endif
}
g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolCount];
uint32 CPs3gcmLocalMemoryAllocator::sm_uiFenceNumber = 1;
uint32 volatile * CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation;
// RSX memory usage stats tracking:
static GPUMemoryStats g_RsxMemoryStats;
struct GPUMemoryStats_Pool
{
int nDefaultPoolSize;
int nDefaultPoolUsed;
int nRTPoolUsed;
int nDynamicPoolUsed;
int nMainMemUsed;
int nUnknownPoolUsed;
};
GPUMemoryStats_Pool g_RsxMemoryStats_Pool;
static inline uint32 Ps3gcmHelper_ComputeTiledAreaMemorySize( uint32 nCount, uint32 w, uint32 h, uint32 bpp )
{
uint32 nTilePitch = cellGcmGetTiledPitchSize( w * bpp );
uint32 uiSize = nTilePitch * AlignValue( h, 32 );
uiSize *= nCount;
uiSize = AlignValue( uiSize, PS3GCMALLOCATIONALIGN( kAllocPs3gcmColorBufferMisc ) );
return uiSize;
}
void Ps3gcmLocalMemoryAllocator_Init()
{
PS3ALLOCMTX
if ( !CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation )
{
CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation = cellGcmGetLabelAddress( GCM_LABEL_MEMORY_FREE );
*CPs3gcmLocalMemoryAllocator::sm_puiFenceLocation = 0;
}
// Pool boundaries
uint32 uiGcmAllocBegin = g_ps3gcmGlobalState.m_nLocalBaseOffset;
uint32 uiGcmAllocEnd = uiGcmAllocBegin + g_ps3gcmGlobalState.m_nLocalSize;
// Memory should be allocated for large frame buffers
uint32 uiMemorySizeBuffer[2] = { MAX( 1280, g_ps3gcmGlobalState.m_nRenderSize[0] ), MAX( 720, g_ps3gcmGlobalState.m_nRenderSize[1] ) };
uint32 uiFactor[2] = { uiMemorySizeBuffer[0]*uiMemorySizeBuffer[1], 1280*720 };
// Configuration of pool memory (can be #ifdef'd for every game)
static const uint32 s_PoolMemoryLayout[/*kGcmAllocPoolCount*/] =
{
#if defined( CSTRIKE15 )
// mhansen - We had to adjust the memory values a bit for cstrike15 to get a map to load
// PS3_BUILDFIX - We need to revisit this to determine the proper size later on
/*kGcmAllocPoolDefault = */ 0,
/*kGcmAllocPoolDynamicNewPath = */ 6 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolDynamic = */ 11 * 1024 * 1024, // 11 MB
/*kGcmAllocPoolTiledColorFB = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2 + CPs3gcmDisplay::SURFACE_COUNT, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ),
// 2 buffers allocated in CreateRSXBuffers + 2 _rt_fullFrameFB - can probably get this down if...
// 1. we clean up the post-pro rendering to use the front buffer as a textureand
// 2. tidy up aliasing for rt_fullframeFB and rt_fullFrameFB1
/*kGcmAllocPoolTiledColorFBQ = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, uiMemorySizeBuffer[0]/4, uiMemorySizeBuffer[1]/4, 4 ), // fits 2 1/4 size framebuffer textures
/*kGcmAllocPoolTiledColor512 = */ 0,
/*kGcmAllocPoolTiledColorMisc = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 1, 640, 640, 4 ) // RTT shadows ?
+ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, 1024, 512, 4) // Water
+ Ps3gcmHelper_ComputeTiledAreaMemorySize(1, 32, 32, 4), // Eye Glint
/*kGcmAllocPoolTiledD24S8 = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 1, 640*2, 640*2, 2)
+ Ps3gcmHelper_ComputeTiledAreaMemorySize(1, 640, 640, 2) // CSM and Flashlight
+ Ps3gcmHelper_ComputeTiledAreaMemorySize( 1, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // Main depth buffer
/*kGcmAllocPoolMainMemory = */ 0, // configured based on mapped IO memory
/*kGcmAllocPoolMallocMemory = */ 0, // using malloc
#else
/*kGcmAllocPoolDefault = */ 0,
/*kGcmAllocPoolDynamicNewPath = */ 5 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolDynamic = */ 10 * 1024 * 1024, // 10 MB
/*kGcmAllocPoolTiledColorFB = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2 * CPs3gcmDisplay::SURFACE_COUNT, uiMemorySizeBuffer[0], uiMemorySizeBuffer[1], 4 ), // fits 6 of full framebuffer textures
/*kGcmAllocPoolTiledColorFBQ = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 4, uiMemorySizeBuffer[0]/4, uiMemorySizeBuffer[1]/4, 4 ), // fits 4 quarters of framebuffer textures
/*kGcmAllocPoolTiledColor512 = */ Ps3gcmHelper_ComputeTiledAreaMemorySize( 2, 512, 512, 4 ), // fits 2 512x512 RGBA textures
/*kGcmAllocPoolTiledColorMisc = */ 5 * 1024 * 1024, // 5 MB
/*kGcmAllocPoolTiledD24S8 = */ uint64( 15 * 1024 * 1024 ) * uiFactor[0]/uiFactor[1], // 15 MB
/*kGcmAllocPoolMainMemory = */ 0, // configured based on mapped IO memory
/*kGcmAllocPoolMallocMemory = */ 0, // using malloc
#endif
};
COMPILE_TIME_ASSERT( ARRAYSIZE( s_PoolMemoryLayout ) == ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ) );
for ( int j = ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); j -- > 0; )
{
const uint32 uiSize = AlignValue( s_PoolMemoryLayout[j], 1024 * 1024 ); // Align it on 1 MB boundaries, all our pools are large
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax = uiGcmAllocEnd;
uiGcmAllocEnd -= uiSize;
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetUnallocated = uiGcmAllocEnd;
}
// Default pool setup (rest of local memory)
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMax = uiGcmAllocEnd;
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetUnallocated = uiGcmAllocBegin + HARDWARE_CURSOR_SIZE;
// Main memory mapped pool
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMin =
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetUnallocated = uint32( g_ps3gcmGlobalState.m_pRsxMainMemoryPoolBuffer ) + g_ps3gcmGlobalState.m_nIoOffsetDelta;
g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMax = g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ].m_nOffsetMin + g_ps3gcmGlobalState.m_nRsxMainMemoryPoolBufferSize;
// Store initial capacity for memory stats tracking:
g_RsxMemoryStats.nGPUMemSize = g_ps3gcmGlobalState.m_nLocalSize;
g_RsxMemoryStats_Pool.nDefaultPoolSize = g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ].m_nOffsetMin;
//
// Setup preset tiled regions
//
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColorFB;
uint8 uiBank = 0; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( g_ps3gcmGlobalState.m_nRenderSize[0] * 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColorFBQ;
uint8 uiBank = 1; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( g_ps3gcmGlobalState.m_nRenderSize[0] * 4 / 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
{
CPs3gcmAllocationPool_t ePool = kGcmAllocPoolTiledColor512;
uint8 uiBank = 2; // bank 0..3
uint32 nRenderPitch = cellGcmGetTiledPitchSize( 512 * 4 );
uint8 uiTileIndex = ePool - kGcmAllocPoolTiledColorFB;
cellGcmSetTileInfo( uiTileIndex, CELL_GCM_LOCATION_LOCAL,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin,
nRenderPitch, CELL_GCM_COMPMODE_DISABLED,
( g_ps3gcmLocalMemoryAllocator[ ePool ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolTiledColorFB ].m_nOffsetMin ) / 0x10000, // The area base + size/0x10000 will be allocated as the tag area.
uiBank );
cellGcmBindTile( uiTileIndex );
}
#ifndef _CERT
static const char * s_PoolMemoryNames[] =
{
/*kGcmAllocPoolDefault = */ "Default Pool",
/*kGcmAllocPoolDynamicNewPath = */ "Dynamic New ",
/*kGcmAllocPoolDynamic = */ "Dynamic IBVB",
/*kGcmAllocPoolTiledColorFB = */ "FullFrameRTs",
/*kGcmAllocPoolTiledColorFBQ = */ "1/4Frame RTs",
/*kGcmAllocPoolTiledColor512 = */ "512x512 RTs ",
/*kGcmAllocPoolTiledColorMisc = */ "All Misc RTs",
/*kGcmAllocPoolTiledD24S8 = */ "DepthStencil",
/*kGcmAllocPoolMainMemory = */ "Main Memory ",
/*kGcmAllocPoolMallocMemory = */ "MallocMemory",
};
COMPILE_TIME_ASSERT( ARRAYSIZE( s_PoolMemoryNames ) == ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ) );
Msg( "RSX Local Memory layout:\n" );
for ( int j = 0; j < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ j )
{
Msg( " %s 0x%08X - 0x%08X [ %9.3f MB ]\n",
s_PoolMemoryNames[j],
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin,
g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax,
(g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[ j ].m_nOffsetMin) / 1024.f / 1024.f );
}
Msg( "Total size: %d MB\n", g_ps3gcmGlobalState.m_nLocalSize / 1024 / 1024 );
#endif
}
void Ps3gcmLocalMemoryAllocator_Reclaim()
{
PS3ALLOCMTX
for ( int k = 0; k < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ k )
g_ps3gcmLocalMemoryAllocator[ k ].Reclaim();
}
void Ps3gcmLocalMemoryAllocator_Compact()
{
PS3ALLOCMTX
#define PS3GCMCOMPACTPROFILE 0
#if PS3GCMCOMPACTPROFILE
float flTimeStart = Plat_FloatTime();
uint32 uiFree = g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated;
#endif
// Let PPU wait for all RSX commands done (include waitFlip)
// Flush GPU right up to current point - Endframe call does this...
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
#if PS3GCMCOMPACTPROFILE
float flTimeWait = Plat_FloatTime() - flTimeStart;
#endif
{
// Locking out memory mgmt for the whole of the compact before this
// PS3ALLOCMTX
for ( int k = 0; k < ARRAYSIZE( g_ps3gcmLocalMemoryAllocator ); ++ k )
{
g_ps3gcmLocalMemoryAllocator[ k ].Compact();
}
}
#if PS3GCMCOMPACTPROFILE
float flTimePrepareTransfer = Plat_FloatTime() - flTimeStart;
#endif
// Wait for all RSX memory to be transferred
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
#if PS3GCMCOMPACTPROFILE
float flTimeDone = Plat_FloatTime() - flTimeStart;
char chBuffer[64];
Q_snprintf( chBuffer, ARRAYSIZE( chBuffer ), "COMPACT: %0.3f / %0.3f / %0.3f sec\n",
flTimeWait, flTimePrepareTransfer, flTimeDone );
uint32 dummy;
sys_tty_write( SYS_TTYP6, chBuffer, Q_strlen( chBuffer ), &dummy );
Q_snprintf( chBuffer, ARRAYSIZE( chBuffer ), "COMPACT: %0.3f -> %0.3f MB (%0.3f MB free)\n",
uiFree / 1024.f / 1024.f, g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated / 1024.f / 1024.f,
(g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
sys_tty_write( SYS_TTYP6, chBuffer, Q_strlen( chBuffer ), &dummy );
#endif
}
void Ps3gcmLocalMemoryAllocator_CompactWithReason( char const *szReason )
{
double flTimeCompactStart = Plat_FloatTime();
DevMsg( "====== GCM LOCAL MEMORY COMPACT : %s =====\n", szReason );
uint32 uiFreeMemoryBeforeCompact = g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated;
DevMsg( "RSX Local Memory Free: %0.3f MB; compacting...\n", (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
Ps3gcmLocalMemoryAllocator_Compact();
DevMsg( "RSX Local Memory Compacted %0.3f MB in %0.3f sec\n",
(uiFreeMemoryBeforeCompact - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f,
Plat_FloatTime() - flTimeCompactStart );
DevMsg( "RSX Local Memory Free: %0.3f MB\n", (g_ps3gcmLocalMemoryAllocator[0].m_nOffsetMax - g_ps3gcmLocalMemoryAllocator[0].m_nOffsetUnallocated) / 1024.f / 1024.f );
}
bool CPs3gcmLocalMemoryBlock::Alloc()
{
PS3ALLOCMTX
return g_ps3gcmLocalMemoryAllocator[PS3GCMALLOCATIONPOOL(m_uType)].Alloc( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( this ) );
}
void CPs3gcmLocalMemoryBlock::Free()
{
PS3ALLOCMTX
g_ps3gcmLocalMemoryAllocator[PS3GCMALLOCATIONPOOL(m_uType)].Free( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( this ) );
}
//////////////////////////////////////////////////////////////////////////
//
// Private implementation of PS3 local memory allocator
//
inline bool CPs3gcmLocalMemoryAllocator::Alloc( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock )
{
TrackAllocStats( pBlock->MutableType(), pBlock->MutableSize() );
uint32 uAlignBytes = PS3GCMALLOCATIONALIGN( pBlock->MutableType() );
Assert( IsPowerOfTwo( uAlignBytes ) );
double flAllocatorStallTime = 0.0f;
bool bCompactPerformed = false;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
bCompactPerformed = !r_ps3_gcmlowcompact.GetBool();
#endif
retry_allocation:
// Try to find a free block
if ( LocalMemoryAllocation_t *pFreeBlock = FindFreeBlock( uAlignBytes, pBlock->MutableSize() ) )
{
pBlock->MutableOffset() = pFreeBlock->m_block.MutableOffset();
pBlock->MutableIndex() = pFreeBlock->m_block.MutableIndex();
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_arrAllocations[ pBlock->MutableIndex() ] != &pFreeBlock->m_block )
Error( "<vitaliy> GCM Local Memory Allocator Error (attempt to reuse invalid free block)!" );
#endif
m_arrAllocations[ pBlock->MutableIndex() ] = reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( pBlock );
delete pFreeBlock;
}
else if ( this != &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMallocMemory ] )
{
// Allocate new block
uint32 uiOldUnallocatedEdge = m_nOffsetUnallocated;
uint32 uiFreeBlock = ( m_nOffsetUnallocated + uAlignBytes - 1 ) & ~( uAlignBytes - 1 );
// Check if there's enough space in this pool for the requested block
if ( uiFreeBlock + pBlock->MutableSize() > m_nOffsetMax )
{
// There's not enough space in this pool
if ( m_pPendingFreeBlock )
{
// There are pending free blocks, we just need to wait for
// RSX to finish rendering using them
if ( !flAllocatorStallTime )
{
flAllocatorStallTime = Plat_FloatTime();
// Flush GPU right up to current point - Endframe call does this...
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFlush();
}
while ( Reclaim() < pBlock->MutableSize() && m_pPendingFreeBlock )
{
ThreadSleep( 1 );
}
goto retry_allocation;
}
else if ( !bCompactPerformed )
{
if (this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ]) return false;
// Let PPU wait for all RSX commands done
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
uint32 uiFragmentedFreeSpace = m_nOffsetMax - m_nOffsetUnallocated;
for ( LocalMemoryAllocation_t *pFreeFragment = m_pFreeBlock; pFreeFragment; pFreeFragment = pFreeFragment->m_pNext )
uiFragmentedFreeSpace += pFreeFragment->m_block.MutableSize();
Warning(
"**************** GCM LOCAL MEMORY LOW *****************\n"
"<vitaliy> GCM Local Memory Allocator#%d pool compacting!\n"
" Requested allocation %u bytes.\n"
" Pool capacity %u bytes.\n"
" Free fragmented space %u bytes.\n"
" Unallocated %u bytes.\n"
" Used %u bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
( uint32 ) pBlock->MutableSize(),
m_nOffsetMax - m_nOffsetMin,
uiFragmentedFreeSpace,
m_nOffsetMax - m_nOffsetUnallocated,
m_nOffsetUnallocated - m_nOffsetMin
);
Compact();
Warning( " ---> Compacted pool#%d has %u unallocated bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
m_nOffsetMax - m_nOffsetUnallocated );
bCompactPerformed = true;
// Wait for all RSX memory to be transferred
gpGcmDrawState->EndFrame();
gpGcmDrawState->CmdBufferFinish();
goto retry_allocation;
}
else
{
// Main memory pool returns failure so caller can try local pool.
if (this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMainMemory ]) return false;
uint32 uiFragmentedFreeSpace = m_nOffsetMax - m_nOffsetUnallocated;
for ( LocalMemoryAllocation_t *pFreeFragment = m_pFreeBlock; pFreeFragment; pFreeFragment = pFreeFragment->m_pNext )
uiFragmentedFreeSpace += pFreeFragment->m_block.MutableSize();
Error(
"********* OUT OF GCM LOCAL MEMORY ********************\n"
"<vitaliy> GCM Local Memory Allocator#%d pool exhausted!\n"
" Failed allocation %u bytes.\n"
" Pool capacity %u bytes.\n"
" Free fragmented space %u bytes.\n"
" Unallocated %u bytes.\n"
" Used %u bytes.\n",
this - g_ps3gcmLocalMemoryAllocator,
( uint32 ) pBlock->MutableSize(),
m_nOffsetMax - m_nOffsetMin,
uiFragmentedFreeSpace,
m_nOffsetMax - m_nOffsetUnallocated,
m_nOffsetUnallocated - m_nOffsetMin
);
}
}
// update the pointer to "unallocated" realm
m_nOffsetUnallocated = uiFreeBlock + pBlock->MutableSize();
// this is the last allocation so far
pBlock->MutableIndex() = m_arrAllocations.AddToTail( reinterpret_cast< CPs3gcmLocalMemoryBlockMutable * >( pBlock ) );
pBlock->MutableOffset() = uiFreeBlock;
}
else
{
MEM_ALLOC_CREDIT_( "GCM Malloc Pool" );
void *pvMallocMemory = MemAlloc_AllocAligned( pBlock->MutableSize(), uAlignBytes );
pBlock->MutableOffset() = (uint32) pvMallocMemory;
pBlock->MutableIndex() = ~0;
}
// 7LTODO if ( flAllocatorStallTime )
// g_ps3gcmGlobalState.m_flAllocatorStallTimeWaitingRSX += Plat_FloatTime() - flAllocatorStallTime;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
// PS3 doesn't allow more than 8 zcull regions (index 0..7)
if ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_arrAllocations.Count() > 8 )
Error( "PS3 number of zcull regions exceeded!\n" );
// PS3 doesn't allow more than 15 tiles regions (index 0..14)
if ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_arrAllocations.Count() +
g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorMisc].m_arrAllocations.Count() +
( kGcmAllocPoolTiledColorMisc - kGcmAllocPoolTiledColorFB )
> 15 )
Error( "PS3 number of tiled regions exceeded!\n" );
pBlock->m_dbgGuardCookie = g_GcmLocalMemoryBlockDebugCookieAllocated;
#endif
return true;
}
inline void CPs3gcmLocalMemoryAllocator::Free( CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock )
{
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( !pBlock ||
pBlock->m_dbgGuardCookie != g_GcmLocalMemoryBlockDebugCookieAllocated ||
( ( pBlock->MutableIndex() != ~0 ) && ( m_arrAllocations[ pBlock->MutableIndex() ] != pBlock ) ) )
{
//DebuggerBreak();
Error( "<vitaliy> Attempt to free not allocated GCM local memory block!" );
}
pBlock->m_dbgGuardCookie = g_GcmLocalMemoryBlockDebugCookieFree;
#endif
LocalMemoryAllocation_t *pDealloc = new LocalMemoryAllocation_t;
pDealloc->m_block = *pBlock;
pDealloc->m_uiFenceNumber = sm_uiFenceNumber;
sm_uiFenceNumber ++;
if(!sm_uiFenceNumber)sm_uiFenceNumber = 1;
pDealloc->m_pNext = m_pPendingFreeBlock;
gpGcmDrawState->SetWriteBackEndLabel(GCM_LABEL_MEMORY_FREE, sm_uiFenceNumber);
m_pPendingFreeBlock = pDealloc;
TrackAllocStats( pBlock->MutableType(), - pBlock->MutableSize() );
if ( pBlock->MutableIndex() != ~0 )
{
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_arrAllocations[ pBlock->MutableIndex() ] != pBlock )
Error( "<vitaliy> GCM Local Memory Allocator Error (freeing block that is not properly registered)!" );
#endif
m_arrAllocations[ pBlock->MutableIndex() ] = &pDealloc->m_block;
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
pBlock->MutableOffset() = ~0;
pBlock->MutableIndex() = ~0;
#endif
}
inline bool CPs3gcmLocalMemoryAllocator::IsFenceCompleted( uint32 uiCurrentFenceValue, uint32 uiCheckStoredFenceValue )
{
#if GCM_ALLOW_NULL_FLIPS
extern bool g_ps3_nullflips;
if ( g_ps3_nullflips )
return true;
#endif
// Needs to handle the counter wrapping around
return ( ( uiCurrentFenceValue - m_uiFenceLastKnown ) >= ( uiCheckStoredFenceValue - m_uiFenceLastKnown ) );
}
inline uint32 CPs3gcmLocalMemoryAllocator::Reclaim( bool bForce )
{
uint32 uiLargestBlockSizeReclaimed = 0;
uint32 uiCurrentFenceValue = *sm_puiFenceLocation;
// Walk pending free blocks and see if they are no longer
// in use by RSX:
LocalMemoryAllocation_t **p = &m_pPendingFreeBlock;
if ( !bForce ) while ( (*p) && !IsFenceCompleted( uiCurrentFenceValue, (*p)->m_uiFenceNumber ) )
p = &( (*p)->m_pNext );
// Now p is pointing to the chain of free blocks
// chain that has been completed (due to the nature of
// pushing new deallocation at the head of the pending
// list)
if ( *p )
{
LocalMemoryAllocation_t *pCompletedChain = *p;
*p = NULL; // Terminate the chain
// Handle the special case of malloc reclaim - free all memory
if ( this == &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolMallocMemory ] )
{
MEM_ALLOC_CREDIT_( "GCM Malloc Pool" );
for ( LocalMemoryAllocation_t *pActualFree = pCompletedChain; pActualFree; )
{
MemAlloc_FreeAligned( pActualFree->m_block.DataInMallocMemory() );
LocalMemoryAllocation_t *pDelete = pActualFree;
pActualFree = pActualFree->m_pNext;
delete pDelete;
}
pCompletedChain = NULL;
}
// Relink the completed pending chain into
// the free blocks chain
LocalMemoryAllocation_t **ppFree = &m_pFreeBlock;
while ( *ppFree )
ppFree = &( (*ppFree)->m_pNext );
*ppFree = pCompletedChain;
// Recompute actual free sizes of the completed chain
// Actual free size is the delta between block offset and next block offset
// When there's no next block then its delta between block offset and unallocated edge
for ( LocalMemoryAllocation_t *pActualFree = pCompletedChain; pActualFree; pActualFree = pActualFree->m_pNext )
{
uint32 uiIdx = pActualFree->m_block.MutableIndex() + 1;
uint32 uiNextOffset = m_nOffsetUnallocated;
if ( uiIdx < m_arrAllocations.Count() )
{
CPs3gcmLocalMemoryBlockMutable * RESTRICT pNextBlock = m_arrAllocations[ uiIdx ];
uiNextOffset = pNextBlock->Offset();
}
uint32 uiActualBlockSize = uiNextOffset - pActualFree->m_block.Offset();
pActualFree->m_block.MutableSize() = uiActualBlockSize;
uiLargestBlockSizeReclaimed = MAX( uiLargestBlockSizeReclaimed, uiActualBlockSize );
}
}
// Remember the last known fence value
m_uiFenceLastKnown = uiCurrentFenceValue;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
#endif
return uiLargestBlockSizeReclaimed;
}
inline CPs3gcmLocalMemoryAllocator::LocalMemoryAllocation_t * CPs3gcmLocalMemoryAllocator::FindFreeBlock( uint32 uiAlignBytes, uint32 uiSize )
{
LocalMemoryAllocation_t **ppBest = NULL;
uint32 uiSizeMax = uiSize * 11/10; // we don't want to inflate requested size by > 10%
for ( LocalMemoryAllocation_t **p = &m_pFreeBlock;
(*p);
p = &( (*p)->m_pNext ) )
{
if ( (*p)->m_block.MutableSize() >= uiSize && (*p)->m_block.MutableSize() <= uiSizeMax &&
!( (*p)->m_block.Offset() & ( uiAlignBytes - 1 ) ) )
{
if ( !ppBest || ( (*p)->m_block.MutableSize() <= (*ppBest)->m_block.MutableSize() ) )
{
ppBest = p;
}
}
}
if ( ppBest )
{
LocalMemoryAllocation_t *pFree = (*ppBest);
(*ppBest) = pFree->m_pNext;
pFree->m_pNext = NULL;
return pFree;
}
return NULL;
}
inline bool TrackAllocStats_Pool( CPs3gcmAllocationType_t uAllocType, int nDelta )
{
CPs3gcmAllocationPool_t pool = PS3GCMALLOCATIONPOOL( uAllocType );
int *stat = &g_RsxMemoryStats_Pool.nUnknownPoolUsed;
bool bInRSXMem = true;
switch( pool )
{
case kGcmAllocPoolDefault:
stat = &g_RsxMemoryStats_Pool.nDefaultPoolUsed;
break;
case kGcmAllocPoolDynamicNewPath:
case kGcmAllocPoolDynamic:
stat = &g_RsxMemoryStats_Pool.nDynamicPoolUsed;
break;
case kGcmAllocPoolTiledColorFB:
case kGcmAllocPoolTiledColorFBQ:
case kGcmAllocPoolTiledColor512:
case kGcmAllocPoolTiledColorMisc:
case kGcmAllocPoolTiledD24S8:
stat = &g_RsxMemoryStats_Pool.nRTPoolUsed;
break;
case kGcmAllocPoolMainMemory: // Unused, unless PS3GCM_VBIB_IN_IO_MEMORY set to 1
case kGcmAllocPoolMallocMemory:
stat = &g_RsxMemoryStats_Pool.nMainMemUsed;
bInRSXMem = false; // In main memory!
break;
}
*stat += nDelta;
Assert( 0 <= (int)*stat );
// Report free memory only from the default pool (the other pools are pre-sized to fixed limits, and all
// geom/textures go into the default pool, so that's where content-driven variation/failures will occur)
g_RsxMemoryStats.nGPUMemFree = g_RsxMemoryStats_Pool.nDefaultPoolSize - g_RsxMemoryStats_Pool.nDefaultPoolUsed;
return bInRSXMem;
}
inline void CPs3gcmLocalMemoryAllocator::TrackAllocStats( CPs3gcmAllocationType_t uAllocType, int nDelta )
{
#if TRACK_ALLOC_STATS
// Early-out for allocations not in RSX memory:
if ( !TrackAllocStats_Pool( uAllocType, nDelta ) )
return;
unsigned int *stat = &g_RsxMemoryStats.nUnknown;
switch( uAllocType )
{
case kAllocPs3gcmColorBufferMisc:
case kAllocPs3gcmColorBufferFB:
case kAllocPs3gcmColorBufferFBQ:
case kAllocPs3gcmColorBuffer512:
case kAllocPs3gcmDepthBuffer:
stat = &g_RsxMemoryStats.nRTSize;
break;
case kAllocPs3gcmTextureData:
case kAllocPs3gcmTextureData0:
stat = &g_RsxMemoryStats.nTextureSize;
break;
case kAllocPs3GcmVertexBuffer:
stat = &g_RsxMemoryStats.nVBSize;
break;
case kAllocPs3GcmIndexBuffer:
stat = &g_RsxMemoryStats.nIBSize;
break;
case kAllocPs3GcmShader:
case kAllocPs3GcmEdgeGeomBuffer:
case kAllocPs3GcmVertexBufferDynamic:
case kAllocPs3GcmIndexBufferDynamic:
case kAllocPs3GcmDynamicBufferPool:
case kAllocPs3GcmVertexBufferDma:
case kAllocPs3GcmIndexBufferDma:
// Treat these as misc unless they become big/variable
break;
}
*stat += nDelta;
Assert( 0 <= (int)*stat );
#endif // TRACK_ALLOC_STATS
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
#define VALIDATECONDITION( x ) if( !( x ) ) { Error( "<vitaliy> GCM Local Memory Allocation block %p index %d is corrupt [line %d]!\n", pBlock, k, __LINE__ ); }
inline void CPs3gcmLocalMemoryAllocator::ValidateAllBlocks()
{
// Traverse the allocated list and validate debug guards and patch-back indices
CUtlVector< uint32 > arrFreeBlocksIdx;
uint32 uiLastAllocatedOffset = m_nOffsetMin;
for ( int k = 0, kEnd = m_arrAllocations.Count(); k < kEnd; ++ k )
{
CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock = m_arrAllocations[k];
VALIDATECONDITION( pBlock );
VALIDATECONDITION( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieAllocated || pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree );
VALIDATECONDITION( pBlock->MutableIndex() < m_arrAllocations.Count() );
VALIDATECONDITION( pBlock->MutableIndex() == k );
VALIDATECONDITION( m_arrAllocations[ pBlock->MutableIndex() ] == pBlock );
VALIDATECONDITION( pBlock->Offset() >= uiLastAllocatedOffset );
uiLastAllocatedOffset = pBlock->Offset() + pBlock->MutableSize();
VALIDATECONDITION( uiLastAllocatedOffset <= m_nOffsetMax );
if ( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree )
arrFreeBlocksIdx.AddToTail( k );
}
// Traverse free lists and validate
LocalMemoryAllocation_t * arrFree[] = { m_pPendingFreeBlock, m_pFreeBlock };
for ( int j = 0; j < ARRAYSIZE( arrFree ); ++ j )
for ( LocalMemoryAllocation_t *p = arrFree[j]; p; p = p->m_pNext )
{
int k = j;
CPs3gcmLocalMemoryBlockMutable * RESTRICT pBlock = &p->m_block;
VALIDATECONDITION( pBlock );
VALIDATECONDITION( pBlock->m_dbgGuardCookie == g_GcmLocalMemoryBlockDebugCookieFree );
k = pBlock->MutableIndex();
if ( pBlock->MutableIndex() != ~0 )
{
VALIDATECONDITION( pBlock->MutableIndex() < m_arrAllocations.Count() );
VALIDATECONDITION( m_arrAllocations[ pBlock->MutableIndex() ] == pBlock );
VALIDATECONDITION( arrFreeBlocksIdx.FindAndFastRemove( pBlock->MutableIndex() ) );
}
}
int k = 0;
void *pBlock = 0;
VALIDATECONDITION( !arrFreeBlocksIdx.Count() );
}
#endif
inline void CPs3gcmLocalMemoryAllocator::Compact()
{
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
if ( r_ps3_gcmnocompact.GetBool() )
return;
#endif
// Reclaim all memory (NOTE: all pending blocks must be reclaimed since both RSX and PPU have stopped rendering!)
Reclaim();
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_pPendingFreeBlock )
Warning( "GCM Local Memory Allocator Compact forces pending free blocks to be reclaimed.\n" );
ValidateAllBlocks();
#endif
if ( m_pPendingFreeBlock )
Reclaim( true );
#ifdef GCMLOCALMEMORYBLOCKDEBUG
if ( m_pPendingFreeBlock )
Error( "<vitaliy> GCM Local Memory Allocator Compact requires RSX and PPU rendering to be paused! (pending free blocks have not been reclaimed)\n" );
ValidateAllBlocks();
#endif
// Walk the free blocks chain and patch-back NULL pointers into allocation tracking system
while ( m_pFreeBlock )
{
LocalMemoryAllocation_t *p = m_pFreeBlock;
m_pFreeBlock = p->m_pNext;
m_arrAllocations[ p->m_block.MutableIndex() ] = NULL;
delete p;
}
Assert( !m_pFreeBlock && !m_pPendingFreeBlock );
// These are elements requiring reallocation
uint32 uiCount = m_arrAllocations.Count();
CPs3gcmLocalMemoryBlockMutable **pReallocationBlocks = m_arrAllocations.Base();
// Here "correct" implementation would be to copy off m_arrAllocations vector onto stack for iteration,
// RemoveAll from m_arrAllocations vector and allocate all blocks again.
// We will cheat since we know that we will allocate same number of elements and directly write zero
// into m_arrAllocations m_Size member, then we will still be able to use the memory of the vector
// for reading blocks requiring compact reallocation, and AddToTail will still fill the vector with
// correct data.
struct AllocatorCompactVectorCheat : public CUtlVector< CPs3gcmLocalMemoryBlockMutable * > { inline void ResetCountPreservingMemoryContents() { m_Size = 0; } };
( ( AllocatorCompactVectorCheat * ) ( char * ) &m_arrAllocations )->ResetCountPreservingMemoryContents();
m_nOffsetUnallocated = m_nOffsetMin;
// Prepare RSX for data buffer transfers in local memory
uint nTransferMode = ( ( this - &g_ps3gcmLocalMemoryAllocator[ kGcmAllocPoolDefault ] ) < kGcmAllocPoolMainMemory ) ? CELL_GCM_TRANSFER_LOCAL_TO_LOCAL : CELL_GCM_TRANSFER_MAIN_TO_MAIN;
Assert( nTransferMode < 4 );
// Reallocate all blocks
for ( ; uiCount; -- uiCount, ++ pReallocationBlocks )
{
CPs3gcmLocalMemoryBlockMutable *pBlock = *pReallocationBlocks;
if ( !pBlock )
continue;
uint32 nOldOffset = pBlock->Offset();
char* pOldAddress = pBlock->DataInAnyMemory();
TrackAllocStats( pBlock->MutableType(), - pBlock->MutableSize() );
Alloc( pBlock );
if ( nOldOffset == pBlock->Offset() )
continue;
// Have RSX transfer blocks data. RSX may hang if there's WriteLabel between the Format and Offset commands,
// so reserve space for both of them up front
// SpuDrawTransfer_t * pTransfer = g_spuGcm.GetDrawQueue()->AllocWithHeader<SpuDrawTransfer_t>( SPUDRAWQUEUE_TRANSFER_METHOD | nTransferMode );
// pTransfer->m_nLineSize = pBlock->MutableSize();
// pTransfer->m_nOldOffset = nOldOffset;
// pTransfer->m_nNewOffset = pBlock->Offset();
// 7LTODO
uint32 uiLineSize = pBlock->MutableSize();
uint32 uiLineOffset = 0;
const uint nMaxTransferSize = 0x3FFFFF;
cellGcmReserveMethodSizeInline(gpGcmContext, 0x4000/4);
GCM_FUNC( cellGcmSetTransferDataMode, nTransferMode );
int i = 1;
do
{
uint32 uiTransferSize = Min<uint32>( uiLineSize, nMaxTransferSize );
GCM_FUNC( cellGcmSetTransferDataFormat, 0, 0, uiTransferSize, 1, 1, 1 );
GCM_FUNC( cellGcmSetTransferDataOffset, pBlock->Offset() + uiLineOffset, nOldOffset + uiLineOffset );
uiLineSize -= uiTransferSize;
uiLineOffset += uiTransferSize;
i++;
}
while ( uiLineSize > 0 );
// V_memmove(pBlock->DataInAnyMemory(), pOldAddress, pBlock->MutableSize() );
}
#ifdef GCMLOCALMEMORYBLOCKDEBUG
ValidateAllBlocks();
#endif
}
//////////////////////////////////////////////////////////////////////////
//
// Computation of tiled memory
//
uint32 CPs3gcmLocalMemoryBlock::TiledMemoryTagAreaBase() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledColorMisc ) // Misc color tiles are placed at the front of tag area after preset pools
return ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are placed in the end of tag area (0-0x7FF is offset range)
return 0x800 - ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_nOffsetMin + m_uiSize ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColorFB ) // FB color tiles go first
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColorFBQ ) // FBQ color tiles go next
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFBQ].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
if ( ePool == kGcmAllocPoolTiledColor512 ) // 512 color tiles go next
return ( g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColor512].m_nOffsetMin - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledColorFB].m_nOffsetMin ) / 0x10000;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute tiled memory tag base from a non-tiled-pool allocation!\n" );
#endif
return ~0;
}
uint32 CPs3gcmLocalMemoryBlock::TiledMemoryIndex() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledColorMisc ) // Color tiles are placed in the front
return m_uiIndex + kGcmAllocPoolTiledColorMisc - kGcmAllocPoolTiledColorFB;
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are placed as last tiles
return 14 - m_uiIndex;
return ePool - kGcmAllocPoolTiledColorFB;
}
uint32 CPs3gcmLocalMemoryBlock::ZcullMemoryIndex() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are the only zcull tiles
return m_uiIndex;
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute zcull index from a non-zcull allocation!\n" );
#endif
return ~0;
}
uint32 CPs3gcmLocalMemoryBlock::ZcullMemoryStart() const
{
CPs3gcmAllocationPool_t ePool = PS3GCMALLOCATIONPOOL(m_uType);
if ( ePool == kGcmAllocPoolTiledD24S8 ) // Depth tiles are the only zcull tiles
return ( Offset() - g_ps3gcmLocalMemoryAllocator[kGcmAllocPoolTiledD24S8].m_nOffsetMin ) / 4; // 1 byte per pixel, D24S8 is 4 bytes per pixel, implicitly 4096 aligned because offset is 64Kb aligned
#ifdef GCMLOCALMEMORYBLOCKDEBUG
Error( "<vitaliy> Cannot compute zcull memory start from a non-zcull allocation!\n" );
#endif
return ~0;
}
//////////////////////////////////////////////////////////////////////////
//
// Allow shaderapi to query GPU memory stats:
//
void GetGPUMemoryStats( GPUMemoryStats &stats )
{
stats = g_RsxMemoryStats;
}

View File

@@ -0,0 +1,167 @@
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
// Local memory manager
//
//==================================================================================================
#ifndef INCLUDED_PS3GCMMEMORY_H
#define INCLUDED_PS3GCMMEMORY_H
#ifndef SPU
#include "tier1/strtools.h"
#include "shaderapi/gpumemorystats.h"
#include "cell/gcm.h"
#include "gcmconfig.h"
#else
#endif
//--------------------------------------------------------------------------------------------------
// Externals
//--------------------------------------------------------------------------------------------------
#ifndef SPU
extern void GetGPUMemoryStats( GPUMemoryStats &stats );
extern void Ps3gcmLocalMemoryAllocator_Init();
#endif
//--------------------------------------------------------------------------------------------------
// Memory Pools, Types and LocalMemoryBlock
//--------------------------------------------------------------------------------------------------
enum CPs3gcmAllocationPool_t
{
kGcmAllocPoolDefault,
kGcmAllocPoolDynamicNewPath,
kGcmAllocPoolDynamic,
kGcmAllocPoolTiledColorFB, // Frame-buffer tiled color memory (should be first preset tiled region)
kGcmAllocPoolTiledColorFBQ, // Quarter-frame-buffer tiled color memory
kGcmAllocPoolTiledColor512, // 512x512 tiled color memory
kGcmAllocPoolTiledColorMisc, // Last tiled color region
kGcmAllocPoolTiledD24S8,
kGcmAllocPoolMainMemory, // Pool in the main RSX-mapped IO memory
kGcmAllocPoolMallocMemory, // Pool in malloc-backed non-RSX-mapped memory
kGcmAllocPoolCount
};
#define PS3GCMALLOCATIONPOOL( uType ) ( (CPs3gcmAllocationPool_t)( ( ((uint32)(uType)) >> 28 ) & 0xF ) )
#define PS3GCMALLOCATIONALIGN( uType ) ( ((uint32)(uType)) & 0xFFFFFF )
#define PS3GCMALLOCATIONTYPE( uAlign, ePool, iType ) (((uint32)(uAlign))&0xFFFFFF) | ( (((uint32)(iType))&0xF) << 24 ) | ( (((uint32)(ePool))&0xF) << 28 )
enum CPs3gcmAllocationType_t
{
// Default pool
kAllocPs3gcmTextureData0 = PS3GCMALLOCATIONTYPE( 128, kGcmAllocPoolMainMemory, 0 ),
kAllocPs3gcmTextureData = PS3GCMALLOCATIONTYPE( 128, kGcmAllocPoolDefault, 1 ),
kAllocPs3GcmVertexBuffer = PS3GCMALLOCATIONTYPE( 32, kGcmAllocPoolDefault, 2 ),
kAllocPs3GcmIndexBuffer = PS3GCMALLOCATIONTYPE( 32, kGcmAllocPoolDefault, 3 ),
kAllocPs3GcmShader = PS3GCMALLOCATIONTYPE( 128, kGcmAllocPoolDefault, 4 ),
kAllocPs3GcmEdgeGeomBuffer = PS3GCMALLOCATIONTYPE( 128, kGcmAllocPoolDefault, 5 ),
// Dynamic pool
kAllocPs3GcmVertexBufferDynamic = PS3GCMALLOCATIONTYPE( 32, kGcmAllocPoolDynamic, 1 ),
kAllocPs3GcmIndexBufferDynamic = PS3GCMALLOCATIONTYPE( 32, kGcmAllocPoolDynamic, 2 ),
kAllocPs3GcmDynamicBufferPool = PS3GCMALLOCATIONTYPE( 32, kGcmAllocPoolDynamicNewPath, 1 ),
// Malloc memory pool
kAllocPs3GcmVertexBufferDma = PS3GCMALLOCATIONTYPE( 32, kGcmAllocPoolMallocMemory, 1 ),
kAllocPs3GcmIndexBufferDma = PS3GCMALLOCATIONTYPE( 32, kGcmAllocPoolMallocMemory, 2 ),
// Tiled pools
kAllocPs3gcmColorBufferFB = PS3GCMALLOCATIONTYPE( 64, kGcmAllocPoolTiledColorFB, 1 ),
kAllocPs3gcmColorBufferFBQ = PS3GCMALLOCATIONTYPE( 64, kGcmAllocPoolTiledColorFBQ, 1 ),
kAllocPs3gcmColorBuffer512 = PS3GCMALLOCATIONTYPE( 64, kGcmAllocPoolTiledColor512, 1 ),
kAllocPs3gcmColorBufferMisc = PS3GCMALLOCATIONTYPE( 64*1024, kGcmAllocPoolTiledColorMisc,1 ),
kAllocPs3gcmDepthBuffer = PS3GCMALLOCATIONTYPE( 64*1024, kGcmAllocPoolTiledD24S8, 1 ),
};
struct CPs3gcmLocalMemoryBlockSystemGlobal;
struct ALIGN16 CPs3gcmLocalMemoryBlock
{
public:
CPs3gcmLocalMemoryBlock() {}
#if 0
#define GCMLOCALMEMORYBLOCKDEBUG
uint64 m_dbgGuardCookie; // Debug cookie used to guard when calling code let block go out of scope without freeing it
#endif
protected:
uint32 m_nLocalMemoryOffset; // Offset in RSX local memory
uint32 m_uiSize; // Actual allocation size, might be larger than requested allocation size
CPs3gcmAllocationType_t m_uType; // Allocation type with required alignment
uint32 m_uiIndex; // Index of the allocation in allocation tracking system
bool Alloc(); // Internal implementation of Local Memory Allocator
// Prevent copying (since patch-back mechanism needs to access the allocated blocks)
CPs3gcmLocalMemoryBlock( CPs3gcmLocalMemoryBlock const &x ) { V_memcpy( this, &x, sizeof( CPs3gcmLocalMemoryBlock ) ); }
CPs3gcmLocalMemoryBlock& operator =( CPs3gcmLocalMemoryBlock const &x ) { V_memcpy( this, &x, sizeof( CPs3gcmLocalMemoryBlock ) ); return *this; }
public:
inline void Assign( CPs3gcmLocalMemoryBlockSystemGlobal const &x ) { V_memcpy( this, &x, sizeof( CPs3gcmLocalMemoryBlock ) ); }
inline bool Alloc( CPs3gcmAllocationType_t uType, uint32 uiSize ) { m_uType = uType; m_uiSize = uiSize; return Alloc(); }
inline void AttachToExternalMemory( CPs3gcmAllocationType_t uType, uint32 nOffset, uint32 uiSize ) { m_uType = uType; m_uiSize = uiSize; m_nLocalMemoryOffset = nOffset; m_uiIndex = ~0; }
void Free();
void FreeAndAllocNew() { Free(); Alloc(); }
inline uint32 Offset() const { return m_nLocalMemoryOffset; }
inline uint32 Size() const { return m_uiSize; }
inline bool IsLocalMemory() const { return PS3GCMALLOCATIONPOOL( m_uType ) < kGcmAllocPoolMainMemory; }
inline bool IsRsxMappedMemory() const { return PS3GCMALLOCATIONPOOL( m_uType ) < kGcmAllocPoolMallocMemory; }
inline uint8 GcmMemoryLocation() const { return IsLocalMemory() ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN; }
#ifndef SPU
char * DataInLocalMemory() const;
char * DataInMainMemory() const;
char * DataInMallocMemory() const;
char * DataInAnyMemory() const;
#endif
// Tiled memory access
uint32 TiledMemoryTagAreaBase() const;
uint32 TiledMemoryIndex() const;
// Zcull memory access
uint32 ZcullMemoryIndex() const;
uint32 ZcullMemoryStart() const;
} ALIGN16_POST;
struct CPs3gcmLocalMemoryBlockSystemGlobal : public CPs3gcmLocalMemoryBlock
{
public:
CPs3gcmLocalMemoryBlockSystemGlobal() {}
private:
// Prevent copying (since patch-back mechanism needs to access the allocated blocks)
CPs3gcmLocalMemoryBlockSystemGlobal( CPs3gcmLocalMemoryBlock const &x );
CPs3gcmLocalMemoryBlockSystemGlobal& operator =( CPs3gcmLocalMemoryBlockSystemGlobal const &x );
};
//--------------------------------------------------------------------------------------------------
// Buffer (used by IB and VBs)
//--------------------------------------------------------------------------------------------------
struct CPs3gcmBuffer
{
CPs3gcmLocalMemoryBlock m_lmBlock;
public:
inline uint32 Offset() { return m_lmBlock.Offset(); }
public:
#ifndef SPU
static CPs3gcmBuffer * New( uint32 uiSize, CPs3gcmAllocationType_t uType );
void Release();
#endif
};
#endif // INCLUDED_PS3GCMMEMORY_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,205 @@
//========== Copyright <20> 2010, Valve Corporation, All rights reserved. ========
// Global GCM-related state
//
#ifndef _PS3GCMSTATE_H_INC_
#define _PS3GCMSTATE_H_INC_
#include "ps3/ps3gcmmemory.h"
#include <cell/gcm.h>
#include "bitmap/imageformat.h"
#include "ps3/ps3_gcm_shared.h"
class CPs3gcmGlobalState
{
public:
void * m_pIoAddress; // RSX IO buffer, base address
uint32 m_nIoSize; // RSX IO total size [including CMD buffer]
uint32 m_nIoSizeNotPreallocated; // the io total size that wasn't pre-allocated in initialization
uint32 m_nCmdSize; // RSX CMD buffer total size [including first reserved 4K]
uint32 const volatile *m_pCurrentCmdBufferSegmentRSX; // Begin offset of current CMD buffer segment being processed by RSX
#if GCM_CTX_UNSAFE_MODE
uint32 *m_pCurrentCmdBufferUnflushedBeginRSX; // Marks beginning of not yet flushed RSX buffer
#endif
void * m_pLocalBaseAddress; // RSX Local Memory Base Address
uint32 m_nLocalBaseOffset; // cellGcmAddressToOffset( m_pLocalBaseAddress )
uint32 m_nLocalSize; // RSX Local Memory Size
uint16 m_nRenderSize[2]; // with & height of the render buffer
float m_flRenderAspect; // aspect ratio of the output device
uint32 m_nIoOffsetDelta; // add this to EA to get Io Offset
uint32 m_nSurfaceRenderPitch;
// this is used to allocate permanent cmd buffers; to be cleared when level reloads, hopefully won't need anything more complicated than that
// but if we do, we can make a page-chain-based (page from 128 bytes) allocator with reference count per page
// NOTE: the buffer MUST have 1KB padding in the end to prevent overfetch RSX crash!
CellGcmContextData m_cmdBufferPermContext;
// vertex and index data buffer
void * m_pRsxDataTransferBuffer;
uint32 m_nRsxDataTransferBufferSize;
// main memory pool buffer
void * m_pRsxMainMemoryPoolBuffer;
uint32 m_nRsxMainMemoryPoolBufferSize;
// special texture to support debug stripes
CPs3gcmLocalMemoryBlock m_debugStripeImageBuffer;
uint32 m_nCmdBufferRefCount; // how many buffers are referenced?
CPs3gcmDisplay m_display; // m_display objects that are created automatically
CPs3gcmLocalMemoryBlock m_pShaderPsEmptyBuffer;
CgBinaryProgram *m_pShaderPsEmpty; // empty pixel shader
uint32 m_nIoLocalOffsetEmptyFragmentProgramSetupRoutine;
uint32 m_nFlushCounter;
float m_flAllocatorStallTimeWaitingRSX; // how long allocator ended up waiting for RSX
public:
int32 Init();
void Shutdown();
void DrawDebugStripe( uint nScreenX, uint nScreenY, uint nStripeY, uint nStripeWidth, uint nStripeHeight, int nNext = 0 );
// pre-allocate memory before command buffer is allocated
void * IoMemoryPrealloc( uint nAlign, uint nSize );
void * IoSlackAlloc( uint nAlign, uint nSize );
void IoSlackFree( void * eaMemory );
bool IsIoMemory( void * eaMemory );
uintp CmdBufferToIoOffset( void *pCmdBuffer );
CellGcmContextData* CmdBufferAlloc( );
void CmdBufferFreeOffset( uint32 );
enum CmdBufferFlushType_t
{
kFlushForcefully,
kFlushEndFrame
};
void CmdBufferFlush( CmdBufferFlushType_t eFlushType );
void CmdBufferFinish();
void CmdBufferReservationCallback( struct CellGcmContextData *context );
uint32 GetRsxControlNextReferenceValue();
// Note:
// Height alignment must be 32 for tiled surfaces on RSX
// 128 for Edge Post MLAA
// 64 for Edge Post MLAA with EDGE_POST_MLAA_MODE_TRANSPOSE_64 flag set
uint GetRenderSurfaceBytes( uint nHeightAlignment = 32 ) const { return m_nSurfaceRenderPitch * AlignValue( m_nRenderSize[1], nHeightAlignment ); }
protected:
void CreateDebugStripeTextureBuffer();
void CreateEmptyPixelShader();
void CreateRsxBuffers();
void CreateIoBuffers();
int InitVideo();
int InitGcm();
};
inline uintp CPs3gcmGlobalState::CmdBufferToIoOffset( void * pCmdBuffer )
{
uintp nIoOffset = uintp( pCmdBuffer ) + m_nIoOffsetDelta;
Assert( ( uintp( pCmdBuffer ) >= uintp( m_cmdBufferPermContext.begin ) && uintp( pCmdBuffer ) < uintp( m_cmdBufferPermContext.end ) ) // can be a perm context buffer
|| ( nIoOffset >= 4096 && nIoOffset <= m_nCmdSize ) ); // or it can be the main cmd buffer (SYSring)
return nIoOffset;
}
inline CellGcmContextData* CPs3gcmGlobalState::CmdBufferAlloc( )
{
m_nCmdBufferRefCount++;
return &m_cmdBufferPermContext;
}
inline void CPs3gcmGlobalState::CmdBufferFreeOffset( uint32 )
{
if( !--m_nCmdBufferRefCount )
{
m_cmdBufferPermContext.current = m_cmdBufferPermContext.begin;
}
}
extern CPs3gcmGlobalState g_ps3gcmGlobalState;
//////////////////////////////////////////////////////////////////////////
//
// inline implementations of PPU-only stuff
//
inline char * CPs3gcmLocalMemoryBlock::DataInLocalMemory() const
{
Assert( IsLocalMemory() );
return
( m_nLocalMemoryOffset - g_ps3gcmGlobalState.m_nLocalBaseOffset ) +
( char * ) g_ps3gcmGlobalState.m_pLocalBaseAddress;
}
inline char * CPs3gcmLocalMemoryBlock::DataInMainMemory() const
{
Assert( !IsLocalMemory() && IsRsxMappedMemory() );
return
m_nLocalMemoryOffset +
( ( char * ) g_ps3gcmGlobalState.m_pIoAddress );
}
inline char * CPs3gcmLocalMemoryBlock::DataInMallocMemory() const
{
Assert( !IsLocalMemory() && !IsRsxMappedMemory() );
return ( char * ) m_nLocalMemoryOffset;
}
inline char * CPs3gcmLocalMemoryBlock::DataInAnyMemory() const
{
switch ( PS3GCMALLOCATIONPOOL( m_uType ) )
{
default: return DataInLocalMemory();
case kGcmAllocPoolMainMemory: return DataInMainMemory();
case kGcmAllocPoolMallocMemory: return DataInMallocMemory();
}
}
// Allow shaderapi to query GPU memory stats:
extern void GetGPUMemoryStats( GPUMemoryStats &stats );
class CmdSubBuffer: public CellGcmContextData
{
public:
static int32_t DoNothing( struct CellGcmContextData *pContext, uint32_t nWords )
{
Error( "CmdSubBuffer callback @%p: trying to allocate %u words\n", pContext, nWords );
return CELL_ERROR_ERROR_FLAG;
}
CmdSubBuffer( uint32 * pBuffer, uint nAllocateWords )
{
this->current = this->begin = pBuffer;
this->end = this->begin + nAllocateWords;
this->callback = DoNothing;
}
~CmdSubBuffer()
{
Assert( this->current == this->end );
}
};
extern uint32 CalculateMemorySizeFromCmdLineParam( char const *pCmdParamName, uint32 nDefaultValue, uint32 nMinValue = 0 );
inline bool CPs3gcmGlobalState::IsIoMemory( void * eaMemory )
{
return uintp( eaMemory ) >= uintp( m_pIoAddress ) && uintp( eaMemory ) <= uintp( m_pIoAddress ) + m_nIoSize;
}
#endif // _PS3GCMSTATE_H_INC_

View File

@@ -0,0 +1,650 @@
// Copyright <20> 2010, Valve Corporation, All rights reserved. ========
#include "tier0/platform.h"
#include "tier0/dbg.h"
#include "tier1/convar.h"
#include "ps3/ps3gcmlabels.h"
#include "ps3gcmstate.h"
#include "spugcm.h"
#include "rsxflip.h"
CFlipHandler g_flipHandler;
ConVar r_drop_user_commands( "r_drop_user_commands", "0" );
ConVar r_ps3_mlaa( "r_ps3_mlaa", "1" ); //
ConVar r_ps3_vblank_miss_threshold( "r_ps3_vblank_miss_threshold", "0.08", FCVAR_DEVELOPMENTONLY, "How much % of vsync time is allowed after vblank for frames that missed vsync to tear and flip immediately" );
#if GCM_ALLOW_TIMESTAMPS
int32 g_ps3_timestampBeginIdx = GCM_REPORT_TIMESTAMP_FRAME_FIRST;
#endif
#if 0 // defined(_DEBUG)
char ALIGN16 g_flipLog[256][32] ALIGN16_POST;
uint g_flipLogIdx = 0;
#define FLIP_LOG(MSG,...) \
{ \
uint nLogIdx = cellAtomicIncr32( &g_flipLogIdx ) & ( ARRAYSIZE( g_flipLog ) - 1 ); \
int nCount = V_snprintf( g_flipLog[nLogIdx], sizeof( g_flipLog[nLogIdx] ), MSG, ##__VA_ARGS__ ); \
int zeroSize = sizeof( g_flipLog[0] ) - 4 - nCount; \
V_memset( g_flipLog[nLogIdx] + nCount, 0, zeroSize ); \
*(uint32*)( g_flipLog[nLogIdx] + sizeof( g_flipLog[0] ) - 4 ) = __mftb(); \
}
#define ENABLE_FLIP_LOG 1
#define FlipAssert( X ) do{if(!(X))DebuggerBreak();}while(false)
uint g_flipUserCommands[1024][2];
#else
#define FLIP_LOG(MSG,...)
#define FlipAssert( X )
#define ENABLE_FLIP_LOG 0
#endif
void CEdgePostWorkload::Kick( void * dst, uint nSetLabel )
{
if( !m_isInitialized )
return;
extern ConVar r_ps3_mlaa;
FLIP_LOG("mlaa %d,mode=%Xh,label=%d", nSetLabel, g_flipHandler.m_nMlaaFlagsThisFrame, *m_mlaaContext.rsxLabelAddress );
edgePostMlaaWait( &m_mlaaContext );
FlipAssert( vec_all_eq( *g_spuGcm.m_pMlaaBufferCookie, g_vuSpuGcmCookie ) );
//FLIP_LOG("mlaa init %d", nSetLabel );
edgePostInitializeWorkload( &m_workload, m_stages, STAGE_COUNT );
bool isMlaaRelativeEdgeDetection = true;
uint8
nMlaaThresholdBase (0x0a), // from Edge sample: these are pretty good threshold values, but you might find better ones...
nMlaaThresholdFactor(0x59),
nMlaaAbsoluteThreshold(0x20);
uint nWidth = g_ps3gcmGlobalState.m_nRenderSize[0], nHeight = g_ps3gcmGlobalState.m_nRenderSize[1];
FlipAssert( nWidth <= 1280 && nWidth >= 640 && nHeight <= 720 && nHeight >= 480 );
//FLIP_LOG("mlaa prep %d", nSetLabel );
edgePostMlaaPrepareWithRelativeThreshold( &m_mlaaContext, g_spuGcm.m_pMlaaBuffer, IsResultInMainMemory()? g_spuGcm.m_pMlaaBufferOut : dst,
nWidth, nHeight,
g_ps3gcmGlobalState.m_nSurfaceRenderPitch,
isMlaaRelativeEdgeDetection?nMlaaThresholdBase:nMlaaAbsoluteThreshold,
isMlaaRelativeEdgeDetection?nMlaaThresholdFactor:0,
g_flipHandler.m_nMlaaFlagsThisFrame,
nSetLabel );
//FLIP_LOG("mlaa kick %d", nSetLabel );
edgePostMlaaKickTasks( &m_mlaaContext );
FLIP_LOG("mlaa kicked %d,label=%d", nSetLabel, *m_mlaaContext.rsxLabelAddress );
FlipAssert( vec_all_eq( *g_spuGcm.m_pMlaaBufferCookie, g_vuSpuGcmCookie ) );
}
void RsxInterruptFifo::Init()
{
m_nGet = m_nPut = 0;
}
uint RsxInterruptFifo::Queue( uint8 nCause, uint8 nSurfaceFlipIdx )
{
Event_t event;
event.m_nCause = nCause;
event.m_nSurfaceFlipIdx = nSurfaceFlipIdx;
return Queue( event );
}
uint RsxInterruptFifo::Queue( const Event_t &event )
{
while( ( m_nPut - m_nGet ) >= MAX_EVENT_COUNT - 1 )
{
sys_timer_usleep( 100 ); // this should NEVER happen
}
#if ENABLE_FLIP_LOG
switch( event.m_nCause )
{
case GCM_USERCMD_POSTPROCESS:
FLIP_LOG( "queue:post %d", event.m_nSurfaceFlipIdx );
break;
case GCM_USERCMD_FLIPREADY:
FLIP_LOG( "queue:flip %d sys%d", event.m_nSurfaceFlipIdx, g_flipHandler.m_nSystemFlipId[ event.m_nSurfaceFlipIdx ] );
break;
default:
FLIP_LOG("Unknown event %d", event.m_nCause );
break;
}
#endif
m_queue[ m_nPut & ( MAX_EVENT_COUNT - 1 ) ] = event;
return ++m_nPut; // Should be atomic if there are multiple event producer threads
}
uint RsxInterruptFifo::GetPutMarker()const
{
return m_nPut;
}
int RsxInterruptFifo::HasEvents( uint nMarker )
{
uint nGet = m_nGet;
Assert( int( nMarker - nGet ) >= 0 );
return int( nMarker - nGet );
}
RsxInterruptFifo::Event_t & RsxInterruptFifo::PeekEvent()
{
uint nGet = m_nGet;
Assert( nGet != m_nPut );
return m_queue[ nGet & ( MAX_EVENT_COUNT - 1 ) ];
}
const RsxInterruptFifo::Event_t RsxInterruptFifo::DequeueEvent( )
{
Event_t event = PeekEvent();
m_nGet++; // should be atomic if there's more than one consumer
return event;
}
void RsxInterruptFifo::QueueRsxInterrupt()
{
uint32 *pReplace = NULL;
#if ENABLE_FLIP_LOG
//FLIP_LOG( "q%X", m_nPut );
g_flipUserCommands[ m_nPut & ( ARRAYSIZE( g_flipUserCommands ) - 1 ) ][ 0 ] = m_nPut;
pReplace = &g_flipUserCommands[ m_nPut & ( ARRAYSIZE( g_flipUserCommands ) - 1 ) ][ 1 ];
*pReplace = uint32( gCellGcmCurrentContext->current );
#endif
/*
if( IsCert() // don't deliberately drop anything in CERT
|| 0 == r_drop_user_commands.GetInt() // don't drop anything if drop==0
|| ( ( rand() % 100 ) >= r_drop_user_commands.GetInt() ) // drop 1% means in 99% of cases we still want to SetUserCommand
)
GCM_FUNC( cellGcmSetUserCommand, m_nPut );
GCM_FUNC( cellGcmSetWriteTextureLabel, GCM_LABEL_LAST_INTERRUPT_GET, m_nPut );
*/
// directly putting it to SPUGCM queue instead of routing it through GCM_FUNC
g_spuGcm.GetDrawQueue()->Push3( SPUDRAWQUEUE_QUEUE_RSX_INTERRUPT_METHOD | GCM_LABEL_LAST_INTERRUPT_GET, m_nPut, ( uintp )pReplace );
}
void CFlipHandler::Init()
{
m_interruptFifo.Init();
/*
V_memset( m_nDebugStates, 0, sizeof( m_nDebugStates ) );
m_nDebugStates[RENDERING_SURFACE] = -1;
*/
m_nFlipSurfaceIdx = 0;
m_nFlipSurfaceCount = 0;
m_nVblankCounter = 100; // how many vblanks since the last flip?
m_bEdgePostResultAlreadyInLocalMemory = false;
m_nMlaaFlagsThisFrame = 0; // disable MLAA before the first BeginScene() is called
m_nMlaaFlagMaskNextFrame = ~0u;
for( int i = 0; i < ARRAYSIZE( m_surfaceEdgePost ) ; ++i ) // initially, the post processing of surfaces is disabled
m_surfaceEdgePost[i] = 0;
// simulated initial state: we just flipped to surface 1, then 2, thus leaving surface 1 (then 0) available to render into
// event[1] may not be set for MLAA mode because in order to start rendering into surface 0 (which we're rendering into), we "waited" for event 1
for ( int j = 2; j < ARRAYSIZE( m_evFlipReady ); ++ j )
m_evFlipReady[j].Set();
//m_nLastFlippedSurfaceIdx = CPs3gcmDisplay::SURFACE_COUNT - 1 ;
m_pLastInterruptGet = cellGcmGetLabelAddress( GCM_LABEL_LAST_INTERRUPT_GET );
*m_pLastInterruptGet = 0;
cellGcmSetVBlankHandler( INTERRUPT_VBlankHandler );
cellGcmSetUserHandler( INTERRUPT_UserHandler );
}
void CFlipHandler::Shutdown()
{
cellGcmSetVBlankHandler( NULL );
cellGcmSetUserHandler( NULL );
}
//////////////////////////////////////////////////////////////////////////
// 1. draw PS/3 system menus into the surface
// 2. queue a reliable "flip ready" event for GCM interrupt thread to process and flip surface to this
//
void CFlipHandler::QmsPrepareFlipSubmit( GcmUserCommandEnum_t nEvent, uint surfaceFlipIdx )
{
uint32 nSystemFlipId = GCM_FUNC_NOINLINE( cellGcmSetPrepareFlip, surfaceFlipIdx );
m_nSystemFlipId[surfaceFlipIdx] = nSystemFlipId;
Assert( !m_evFlipReady[ surfaceFlipIdx ].Check() );
m_interruptFifo.Queue( nEvent, surfaceFlipIdx );
}
ConVar r_ps3_mlaa_pulse( "r_ps3_mlaa_pulse", "0" );
enum EdgePostFlags_t {
EDGE_POST_MLAA_FLAG_MASK = ( EDGE_POST_MLAA_MODE_ENABLED | EDGE_POST_MLAA_MODE_SHOW_EDGES | EDGE_POST_MLAA_MODE_SINGLE_SPU_TRANSPOSE | EDGE_POST_MLAA_MODE_TRANSPOSE_64 )
};
void CFlipHandler::BeginScene()
{
#if GCM_ALLOW_TIMESTAMPS
if ( g_ps3_timestampBeginIdx >= 0 )
{
GCM_FUNC( cellGcmSetTimeStamp, g_ps3_timestampBeginIdx );
g_ps3_timestampBeginIdx = -1;
}
#endif
m_nMlaaFlagsThisFrame = r_ps3_mlaa.GetInt() & EDGE_POST_MLAA_FLAG_MASK;
if( int nPulse = r_ps3_mlaa_pulse.GetInt() )
{
if( 1 & ( g_spuGcm.m_nFrame / nPulse ) )
{
m_nMlaaFlagsThisFrame = 0; // disable for 16 frames = 1/2 second
}
}
m_nMlaaFlagsThisFrame &= m_nMlaaFlagMaskNextFrame;
//m_nMlaaFlagMaskNextFrame = (uint)-1;
}
void CFlipHandler::TransferMlaaResultIfNecessary( uint nSurfacePrevFlipIdx )
{
if( m_bEdgePostResultAlreadyInLocalMemory )
return;
if( g_edgePostWorkload.ShouldUseLabelForSynchronization() )
{
GCM_FUNC( cellGcmSetWaitLabel, GCM_LABEL_EDGEPOSTMLAA, nSurfacePrevFlipIdx );
}
else
{
// wait for SPU to finish post-processing previous surface
uint32 *pPrevJts = &g_spuGcm.m_pEdgePostRsxLock[ nSurfacePrevFlipIdx ];
if( *pPrevJts != CELL_GCM_RETURN() )
{
GCM_FUNC( cellGcmSetCallCommand, uintp( pPrevJts ) + g_spuGcmShared.m_nIoOffsetDelta );
}
}
//
// NOTE: we can start post-processing before SetPrepareFlip, it only makes sense since we don't always use interrupt to do so
// if we ever do proper synchronization with SPU workload, we should kick Edge Post here, before SetPrepareFlip
//
if( g_edgePostWorkload.IsResultInMainMemory() )
{
CPs3gcmLocalMemoryBlockSystemGlobal & prevSurfaceColor = g_ps3gcmGlobalState.m_display.surfaceColor[nSurfacePrevFlipIdx];
GCM_FUNC( cellGcmSetTransferImage, CELL_GCM_TRANSFER_MAIN_TO_LOCAL,
prevSurfaceColor.Offset(), g_ps3gcmGlobalState.m_nSurfaceRenderPitch, 0, 0,
uintp( g_spuGcm.m_pMlaaBufferOut ) + g_ps3gcmGlobalState.m_nIoOffsetDelta, g_ps3gcmGlobalState.m_nSurfaceRenderPitch, 0, 0,
g_ps3gcmGlobalState.m_nRenderSize[0], g_ps3gcmGlobalState.m_nRenderSize[1],
4 );
}
m_bEdgePostResultAlreadyInLocalMemory = true;
}
bool CFlipHandler::QmsAdviceBeforeDrawPrevFramebuffer()
{
uint nSurfacePrevFlipIdx = g_ps3gcmGlobalState.m_display.PrevSurfaceIndex( 1 );
uint8 prevPostProcessed = m_surfaceEdgePost[nSurfacePrevFlipIdx];
if( prevPostProcessed ) // did previous surface need post-processing?
{
// we'd actually be free to start MLAA here instead of in Flip, for the cost of one more RSX->PPU interrupt
// but we don't do that because we only may do so when the LAST player draws, and we don't know if this post processing
// that will now start is related to the LAST player
// we don't need to do that until flip if we're using deferred queue
// although if we're using deferred queue and we run out of space there, we stop using it, replay it and start defer-render into previous frame
TransferMlaaResultIfNecessary( nSurfacePrevFlipIdx );
// do the post-processing on this frame, in the mean time render into previous frame
return true;
}
return false; // there's no need to switch surfaces now
}
void CFlipHandler::Flip()
{
#if GCM_ALLOW_TIMESTAMPS
OnFrameTimestampAvailableMST( 1.0f );
#endif
extern ConVar mat_vsync;
m_bVSync = mat_vsync.GetBool();
g_ps3gcmGlobalState.CmdBufferFlush( CPs3gcmGlobalState::kFlushForcefully );
g_spuGcm.GetDrawQueue()->Push1( SPUDRAWQUEUE_FRAMEEVENT_METHOD | SDQFE_END_FRAME );
uint surfaceFlipIdx = g_ps3gcmGlobalState.m_display.surfaceFlipIdx, nSurfaceNextFlipIdx = g_ps3gcmGlobalState.m_display.NextSurfaceIndex( 1 ), nSurfaceAfterNextFlipIdx = g_ps3gcmGlobalState.m_display.NextSurfaceIndex( 2 ), nSurfacePrevFlipIdx = g_ps3gcmGlobalState.m_display.PrevSurfaceIndex( 1 );
/*
uint nScreenWidth = g_ps3gcmGlobalState.m_nRenderSize[0];
uint nScreenY = 40;
g_ps3gcmGlobalState.DrawDebugStripe( nScreenWidth * surfaceFlipIdx / 3, nScreenY, 0, nScreenWidth / 3, 4 );
g_ps3gcmGlobalState.DrawDebugStripe( ( g_spuGcm.m_nFrame & 0xF ) * ( nScreenWidth / 16 ), 34, 0, ( nScreenWidth / 16 ) * ( 1 + m_nFlipSurfaceCount ), 1 );
*/
// let interrupt know we're ready to post-process the new frame, and we wanna flip the previous frame
//g_ps3gcmGlobalState.CmdBufferFinish();
uint32 * pThisJts = g_spuGcm.m_pEdgePostRsxLock + surfaceFlipIdx; // may be NULL + idx
Assert( !g_spuGcm.m_pEdgePostRsxLock || *pThisJts == CELL_GCM_RETURN() );
uint8 prevPostProcessed = m_surfaceEdgePost[nSurfacePrevFlipIdx];
uint8 thisPostProcess = g_spuGcm.m_pMlaaBuffer ? ( uint8 ) ( m_nMlaaFlagsThisFrame & EDGE_POST_MLAA_FLAG_MASK ): 0 ;
if( prevPostProcessed ) // did previous surface need post-processing?
{
TransferMlaaResultIfNecessary( nSurfacePrevFlipIdx );
//if( g_spuGcm.m_bUseDeferredDrawQueue )
{
// now is the time to execute all the deferred commands, if there are any
// NOTE: this will often do nothing , because current frame would've flushed previous frame deferred commands already
// right before starting writing its own
g_spuGcm.ExecuteDeferredDrawQueue( 1 );
}
//g_ps3gcmGlobalState.DrawDebugStripe( nScreenWidth * surfaceFlipIdx / 3, 44, surfaceFlipIdx, nScreenWidth / 3, 2, -1 );
// prepare flip of previous frame - Edge Post processed buffer
// the previous frame was post-processed; we'll prepare flip on it.
QmsPrepareFlipSubmit( GCM_USERCMD_FLIPREADY, nSurfacePrevFlipIdx );
}
else
{
// if previous frame wasn't post-processed, don't flip it because we don't want to flip the same framebuffer twice (although we probably could)
// so we don't have anything to flip here, but have a frame to post-process
g_spuGcm.ExecuteDeferredDrawQueue( 1 );
}
m_surfaceEdgePost[surfaceFlipIdx] = thisPostProcess; // is post-process required for this surface ?
if( thisPostProcess )
{
if( !( m_nMlaaFlagsThisFrame & EDGE_POST_MLAA_MODE_ENABLED ) )
{
m_bEdgePostResultAlreadyInLocalMemory = true; // don't attempt to transfer the results; we don't _really_ do edge post processing, so we consider the results are in memory already
}
else
{
// EDGE POST TODO: JTS - the previous EdgePost must release it. To avoid overwriting edge post buffer before it finished tranferring back to local memory
// to release JTS from the future, we can use a separate ring buffer "JTS-RET" sequences and just call into it here.
// or we can wait for a label and set it from SPU
// as a simplification, we can just wait for edge post to finish synchronously on ppu
// we can also use a mutex of sorts and insert JTS here only when edge post is not finished yet
// we only can start transferring the image after the SPU is done streaming previous frame (if previous frame was post-processed)
// so wait for SPU to release previous frame, if it was post-processed.
// Also, if SPU didn't finish post-processing, then we need to synchronize (wait on RSX for SPU to be done)
// but in many cases SPU will be done by now, so we don't need to spend 900+ns in RSX front-end on CALL+RET
if( !g_edgePostWorkload.ShouldUseLabelForSynchronization() )
{
*pThisJts = CELL_GCM_JUMP( uintp( pThisJts ) + g_spuGcmShared.m_nIoOffsetDelta ); // this will be JTS for SPU to overwrite when post-processing of this frame is done
}
CPs3gcmLocalMemoryBlockSystemGlobal & surfaceColor = g_ps3gcmGlobalState.m_display.surfaceColor[surfaceFlipIdx];
GCM_FUNC( cellGcmSetTransferImage, CELL_GCM_TRANSFER_LOCAL_TO_MAIN,
uintp( g_spuGcm.m_pMlaaBuffer ) + g_ps3gcmGlobalState.m_nIoOffsetDelta, g_ps3gcmGlobalState.m_nSurfaceRenderPitch, 0, 0,
surfaceColor.Offset(), g_ps3gcmGlobalState.m_nSurfaceRenderPitch, 0, 0,
g_ps3gcmGlobalState.m_nRenderSize[0], g_ps3gcmGlobalState.m_nRenderSize[1],
4 );
// This frame was rendered and transferred to main memory; we'll let interrupt thread know it's ready for Edge Post processing
m_interruptFifo.Queue( GCM_USERCMD_POSTPROCESS, surfaceFlipIdx );
m_bEdgePostResultAlreadyInLocalMemory = false;
}
}
else
{
// we aren't post-processing this frame, so we need to just prepare flip and flip this framebuffer
g_spuGcm.ExecuteDeferredDrawQueue( 0 );
QmsPrepareFlipSubmit( GCM_USERCMD_FLIPREADY, surfaceFlipIdx );
m_bEdgePostResultAlreadyInLocalMemory = true; // don't attempt to transfer the results; we don't do edge post - processing, so we consider the results are in memory already
}
g_spuGcm.FlipDeferredDrawQueue( );
if( thisPostProcess && !prevPostProcessed )
{
// we absolutely MUST reset RSX state before the next frame.
// QmsPrepareFlipSubmit() does that by definition, but if we don't call it in this Flip (i.e. when !prevPostProcessed && thisPostProcess)
// we must FORCE RSX state reset
g_spuGcm.GetDrawQueue()->Push1( SPUDRAWQUEUE_RESETRSXSTATE_METHOD );
}
#if GCM_ALLOW_TIMESTAMPS
{
// The current frame has just finished, insert a timestamp instruction right before flip
GCM_FUNC( cellGcmSetTimeStamp, surfaceFlipIdx * 2 + GCM_REPORT_TIMESTAMP_FRAME_FIRST + 1 );
g_ps3_timestampBeginIdx = nSurfaceNextFlipIdx * 2 + GCM_REPORT_TIMESTAMP_FRAME_FIRST;
}
#endif
m_interruptFifo.QueueRsxInterrupt();
g_ps3gcmGlobalState.CmdBufferFlush( CPs3gcmGlobalState::kFlushEndFrame );
//g_ps3gcmGlobalState.CmdBufferFinish();
//
// Make sure that the next framebuffer is free to render into. For that to be so,
// the flip should happen from the next to the buffer after next. When that happens,
// the TV shows the buffer after next, and the next buffer is not visible to the user,
// so it's allowed to render into the next buffer.
//
FLIP_LOG( "ev Wait %d", nSurfaceAfterNextFlipIdx );
m_evFlipReady[ nSurfaceAfterNextFlipIdx ].Wait();
m_evFlipReady[ nSurfaceAfterNextFlipIdx ].Reset();
FLIP_LOG( "Draw %d, ev Reset %d", nSurfaceNextFlipIdx, nSurfaceAfterNextFlipIdx );
#if GCM_ALLOW_TIMESTAMPS
{
// Since the previous flip completely finished, we can grab its timestamps now
uint32 uiLastFrameTimestampIdx = ( nSurfaceAfterNextFlipIdx ) * 2 + GCM_REPORT_TIMESTAMP_FRAME_FIRST;
uint64 uiStartTimestamp = cellGcmGetTimeStamp( uiLastFrameTimestampIdx );
uint64 uiEndTimestamp = cellGcmGetTimeStamp( uiLastFrameTimestampIdx + 1 );
uint64 uiRsxTimeInNanoSeconds = uiEndTimestamp - uiStartTimestamp;
OnFrameTimestampAvailableRsx( uiRsxTimeInNanoSeconds / 1000000.0f );
}
#endif
}
bool IsRsxReadyForNoninteractiveRefresh( )
{
uint nSurfaceAfterNextFlipIdx = g_ps3gcmGlobalState.m_display.NextSurfaceIndex( 2 );
return g_flipHandler.m_evFlipReady[ nSurfaceAfterNextFlipIdx ].Check();
// if we are 3 vblanks past last flip already, another refresh would be welcome ; if we have no surfaces to flip in this case, we are most likely ready to flip right away
// another thing to check is the interrupt FIFO: if it's not idle, let's just postpone being ready
// return g_flipHandler.m_nVblankCounter > 3 && g_flipHandler.m_nFlipSurfaceCount == 0 && g_flipHandler.m_interruptFifo.IsIdle();
}
void CFlipHandler::TryFlipVblank()
{
// artificially simulate an interrupt for cause, because there's suspicion it was dropped
//
// only attempt to generate artificial interrupts if our ready flip queue is empty, otherwise there's no need
// to tap the narrow 15.6Mb/s bus
uint nMarker = *g_flipHandler.m_pLastInterruptGet;
m_nVblankCounter ++;
#if ENABLE_FLIP_LOG
static int m_nLastFlipLogIdx = 0;
if( m_nLastFlipLogIdx != g_flipLogIdx )
{
V_snprintf( g_flipLog[m_nLastFlipLogIdx], sizeof( g_flipLog[m_nLastFlipLogIdx] ), "%X.Vblanks ..%d", nMarker, m_nVblankCounter );
}
else
{
m_nLastFlipLogIdx = cellAtomicIncr32( &g_flipLogIdx ) & ( ARRAYSIZE( g_flipLog ) - 1 );
V_snprintf( g_flipLog[m_nLastFlipLogIdx], sizeof( g_flipLog[m_nLastFlipLogIdx] ), "%X.Vblank %d", nMarker, m_nVblankCounter );
}
#endif
TryPumpEvents( nMarker, 1 );
}
bool CFlipHandler::TryFlipSurface( uint isVblank )
{
if( !m_nFlipSurfaceCount )
{
return false;
}
if( m_bVSync )
{
if( m_nVblankCounter < m_nPresentFrequency )
{
//FLIP_LOG( "no flip: %d vblanks", m_nVblankCounter, m_nPresentFrequency );
return false;
}
if( !isVblank )
{
double flVSyncInterval = m_flVBlankTimestamp - m_flVBlankTimestamp0, flMissThreshold = r_ps3_vblank_miss_threshold.GetFloat() * flVSyncInterval;
double flMiss = Plat_FloatTime() - m_flVBlankTimestamp;
if ( flMiss > flMissThreshold )
{
FLIP_LOG("no flip: %.2fms miss", flMiss * 1000 );
return false; // wait for another vsync, missed by too much
}
}
}
// flip the surface immediately
uint nSystemFlipId = m_nSystemFlipId[ m_nFlipSurfaceIdx ];
cellGcmSetFlipImmediate( nSystemFlipId );
#ifdef GCM_ALLOW_TIMESTAMPS
// Collect time since previous flip
double flFlipImmediateTimestamp = Plat_FloatTime();
OnFrameTimestampAvailableFlip( ( flFlipImmediateTimestamp - m_flFlipImmediateTimestamp ) * 1000.0f );
m_flFlipImmediateTimestamp = flFlipImmediateTimestamp;
#endif
FLIP_LOG( isVblank ? "vFlip%u, ev Set %u" : "_Flip%u, ev Set %u", nSystemFlipId, m_nFlipSurfaceIdx );
// Release PPU QMS thread waiting for this flip
m_evFlipReady[m_nFlipSurfaceIdx].Set();
m_nFlipSurfaceIdx = ( m_nFlipSurfaceIdx + 1 ) % CPs3gcmDisplay::SURFACE_COUNT;
m_nFlipSurfaceCount--;
m_nVblankCounter = 0;
return true;
}
void CFlipHandler::TryPumpEvents( uint nMarker, uint isVblank )
{
if ( m_mutexOfInterruptThread.TryLock() )
{
PumpEventsUnsafe( nMarker );
TryFlipSurface( isVblank ); // this will often be duplicate call
g_flipHandler.m_mutexOfInterruptThread.Unlock();
}
}
void CFlipHandler::PumpEventsUnsafe( uint nMarker )
{
while( m_interruptFifo.HasEvents( nMarker ) )
{
if( !OnRsxInterrupt( m_interruptFifo.DequeueEvent() ) )
break;
}
}
bool RsxInterruptFifo::IsValidMarker( uint nMarker )
{
return ( nMarker - m_nGet ) <= MAX_EVENT_COUNT;
}
bool CFlipHandler::OnRsxInterrupt( const RsxInterruptFifo::Event_t event )
{
switch( event.m_nCause )
{
case GCM_USERCMD_POSTPROCESS:
{
// start edge post processing phase here; we can't do the flip yet because we didn't post-process the buffer yet
// Simulating MLAA job running and adding the cause to the end of the array some time in the nearest (4-5ms) future
void * pColorSurface = g_ps3gcmGlobalState.m_display.surfaceColor[event.m_nSurfaceFlipIdx].DataInLocalMemory();
if( true )
{
// g_spuGcm.SyncMlaa();
g_edgePostWorkload.Kick( pColorSurface, event.m_nSurfaceFlipIdx );
}
else
{
FLIP_LOG( "mlaa sync %d", event.m_nSurfaceFlipIdx );
g_spuGcm.SyncMlaa( pColorSurface );
g_spuGcm.m_pEdgePostRsxLock[event.m_nSurfaceFlipIdx] = CELL_GCM_RETURN(); // this will be poked by the SPU job
}
}
break;
case GCM_USERCMD_FLIPREADY:
FlipAssert( ( m_nFlipSurfaceIdx + m_nFlipSurfaceCount ) % CPs3gcmDisplay::SURFACE_COUNT == event.m_nSurfaceFlipIdx );
FLIP_LOG( "flip ready %d:sys%d", event.m_nSurfaceFlipIdx, m_nSystemFlipId[event.m_nSurfaceFlipIdx] );
m_nFlipSurfaceCount++;
break;
}
return true;
}
void CFlipHandler::INTERRUPT_VBlankHandler( const uint32 head )
{
double flVBlankTimestampSave = g_flipHandler.m_flVBlankTimestamp;
g_flipHandler.m_flVBlankTimestamp = Plat_FloatTime();
g_flipHandler.m_flVBlankTimestamp0 = flVBlankTimestampSave;
g_flipHandler.TryFlipVblank( );
}
void CFlipHandler::INTERRUPT_UserHandler( const uint32 nMarker )
{
if( g_flipHandler.m_interruptFifo.IsValidMarker( nMarker ) )
{
//FLIP_LOG( "%X.UserInterrupt", nMarker );
g_flipHandler.TryPumpEvents( nMarker, 0 );
}
else
{
// invalid marker: this marker has already happened; skip it
//FLIP_LOG( "%X.ERROR.UserInterrupt", nMarker );
DebuggerBreak();
}
}
void Ps3gcmFlip_SetFlipPresentFrequency( int nNumVBlanks )
{
if ( g_flipHandler.m_nPresentFrequency != nNumVBlanks )
{
nNumVBlanks = MAX( 1, nNumVBlanks );
nNumVBlanks = MIN( 12, nNumVBlanks );
if ( g_flipHandler.m_nPresentFrequency != nNumVBlanks )
{
g_flipHandler.m_nPresentFrequency = nNumVBlanks;
}
}
}
/*
void CFlipHandler::OnState( int nState, int nValue )
{
m_nDebugStates[nState] = nValue;
if( m_nDebugStates[RENDERING_SURFACE] == m_nDebugStates[DISPLAYING_SURFACE] )
DebuggerBreak();
}*/

View File

@@ -0,0 +1,122 @@
//========== Copyright <20> 2010, Valve Corporation, All rights reserved. ========
#ifndef MATERIALSYSTEM_PS3GCM_RSXFLIP_HDR
#define MATERIALSYSTEM_PS3GCM_RSXFLIP_HDR
#ifndef _CERT
#define GCM_ALLOW_TIMESTAMPS 1
void OnFrameTimestampAvailableFlip( float ms );
void OnFrameTimestampAvailableRsx( float ms );
void OnFrameTimestampAvailableMain( float ms );
void OnFrameTimestampAvailableMST( float ms );
extern int32 g_ps3_timestampBeginIdx;
#endif
#include "ps3/ps3gcmmemory.h"
class RsxInterruptFifo
{
public:
struct Event_t
{
uint8 m_nCause;
uint8 m_nSurfaceFlipIdx;
};
protected:
enum { MAX_EVENT_COUNT = 0x80 };
volatile uint m_nGet;
uint m_nPut;
Event_t m_queue[MAX_EVENT_COUNT];
public:
void Init();
uint Queue( uint8 nCause, uint8 nSurfaceFlipIdx );
uint Queue( const Event_t &event );
uint GetPutMarker()const;
int HasEvents( uint nMarker );
bool IsIdle()const { return m_nPut == m_nGet;}
bool IsValidMarker( uint nMarker );
Event_t & PeekEvent();
const Event_t DequeueEvent( );
void QueueRsxInterrupt();
};
class CFlipHandler
{
public:
void Init();
void Shutdown();
void Flip();
void BeginScene();
void EndScene(){}
bool OnRsxInterrupt( const RsxInterruptFifo::Event_t event );
void TryFlipVblank();
void TryPumpEvents( uint nMarker, uint isVblank );
void QmsPrepareFlipSubmit( GcmUserCommandEnum_t nEvent, uint surfaceFlipIdx );
bool QmsAdviceBeforeDrawPrevFramebuffer();
void DisableMlaa(){ m_nMlaaFlagsThisFrame = 0; }
void DisableMlaaPermannetly(){ m_nMlaaFlagMaskNextFrame = 0; }
void EnableMlaaPermannetly(){ m_nMlaaFlagMaskNextFrame = ~0u; }
//void DisableMlaaForTwoFrames(){ m_nMlaaFlagsThisFrame = m_nMlaaFlagMaskNextFrame = 0; }
int IsMlaaEnabled()const { return m_nMlaaFlagsThisFrame; }
enum DebugStateEnum_t
{
RENDERING_SURFACE,
DISPLAYING_SURFACE,
DEBUG_STATE_COUNT
};
//void OnState( int nState, int nValue );
public:
static void INTERRUPT_VBlankHandler( const uint32 head );
static void INTERRUPT_UserHandler( const uint32 cause );
void PumpEventsUnsafe( uint nMarker );
bool TryFlipSurface( uint isVblank );
protected:
void TransferMlaaResultIfNecessary( uint nSurfacePrevFlipIdx );
public:
//int m_nDebugStates[DEBUG_STATE_COUNT];
// How often to present in terms of vblanks?
// (@60Hz scanout TV: 1 = 60 Hz = every vblank, 2 = 30 Hz = every other vblank, 3 = 20 Hz = every 3rd vblank)
// (@50Hz PAL TV: 1 = 50 Hz = every vblank, 2 = 25 Hz = every other vblank, 3 = 17 Hz = every 3rd vblank)
int m_nPresentFrequency;
// Interrupt-driven data
#ifdef GCM_ALLOW_TIMESTAMPS
double m_flFlipImmediateTimestamp;
#endif
double m_flVBlankTimestamp, m_flVBlankTimestamp0;
// Mutex to sync with interrupt thread
CThreadMutex m_mutexOfInterruptThread;
CThreadManualEvent m_evFlipReady[ CPs3gcmDisplay::SURFACE_COUNT ];
uint m_nFlipSurfaceIdx, m_nFlipSurfaceCount; // the next surface to flip, count of surfaces to flip
uint m_nSystemFlipId[ CPs3gcmDisplay::SURFACE_COUNT ];
//uint m_nLastFlippedSurfaceIdx; // used to check for duplicate TryFlip callbacks
uint m_nVblankCounter;
uint32 * m_pLastInterruptGet;
RsxInterruptFifo m_interruptFifo;
uint8 m_surfaceEdgePost[CPs3gcmDisplay::SURFACE_COUNT]; // true when the corresponding surface must be post-processed
// VSync enabled?
// true = Syncronize with VSync = true
// false = Syncronize with every HSync scanline
bool m_bVSync;
bool m_bEdgePostResultAlreadyInLocalMemory;
int m_nMlaaFlagsThisFrame;
int m_nMlaaFlagMaskNextFrame;
};
extern CFlipHandler g_flipHandler;
#endif

View File

@@ -0,0 +1,4 @@
//================ Copyright (c) 1996-2009 Valve Corporation. All Rights Reserved. =================
#include "dxabstract.h"
#include "common/ps3/rsx_spu_double_ring.cpp"

View File

@@ -0,0 +1 @@
sce-cgc --mnvb -p sce_fp_rsx -o shader_ps_empty.bin shader_ps_empty.cg

Binary file not shown.

View File

@@ -0,0 +1,7 @@
void main
(
out float4 c : COLOR
)
{
c.rgba = float4( 1, 0, 1, 1 );
}

View File

@@ -0,0 +1,11 @@
0x00, 0x00, 0x1B, 0x5C, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, 0x01
, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x80
, 0x00, 0x00, 0x04, 0x18, 0x00, 0x00, 0x0A, 0xC5, 0x00, 0x00, 0x10, 0x05, 0xFF, 0xFF, 0xFF, 0xFF
, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50
, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00
, 0x43, 0x4F, 0x4C, 0x4F, 0x52, 0x00, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF
, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
, 0x1E, 0x7E, 0x7E, 0x00, 0xC8, 0x00, 0x1C, 0x9D, 0xC8, 0x00, 0x00, 0x01, 0xC8, 0x00, 0x00, 0x01
, 0x1E, 0x01, 0x01, 0x00, 0x28, 0x02, 0x1C, 0x9C, 0xC8, 0x00, 0x00, 0x01, 0xC8, 0x00, 0x00, 0x01
, 0x00, 0x00, 0x3F, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00

View File

@@ -0,0 +1,210 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
#include "tier0/memalloc.h"
#include "ps3/ps3_gcm_config.h"
#include "spudrawqueue.h"
#include "ps3gcmstate.h"
void SpuDrawQueue::Init( uint nBufferSize, uint32 * pSignal, FnFlushCallback_t fnFlushCallback, FnStallCallback_t fnStallCallback )
{
if( nBufferSize < 2 * DRAWQUEUE_LSRING_SIZE )
{
Warning("SpuDrawQueue requested size (%d bytes) is too small (must be at least %d), auto-adjusting\n", nBufferSize, 2 * DRAWQUEUE_LSRING_SIZE );
nBufferSize = 2 * DRAWQUEUE_LSRING_SIZE;
}
m_pBuffer = ( uint32* ) g_ps3gcmGlobalState.IoSlackAlloc( 128, nBufferSize );
m_pBufferEnd = AddBytes( m_pBuffer, nBufferSize & -16 );
m_pPut = m_pGet = m_pBuffer;
*pSignal = GetSignal();
m_pSignal = pSignal;
m_fnFlushCallback = fnFlushCallback;
m_fnStallCallback = fnStallCallback;
m_fnFlushCallbackStack = NULL;
#ifdef _DEBUG
m_nAllocBreakAddress = NULL;
m_nAllocCount = m_nCollectCount = 0;
m_nAllocBreak = m_nCollectBreak = 0;
#endif
m_nAllocWords = 0;
m_pFlushWatermark = AddBytes( m_pBuffer, DRAWQUEUE_LSRING_SIZE );
if( m_pFlushWatermark + 8 >= m_pBufferEnd )
{
Error( "SpuDrawQueue misconfiguration: allocated buffer of %d bytes, but LS watermark size is %d bytes. Increase the main memory buffer size to avoid PPU deadlocks\n", nBufferSize, DRAWQUEUE_LSRING_SIZE );
}
}
void SpuDrawQueue::PushFlushCallback( FnFlushCallback_t fnNewCallback )
{
Assert( !m_fnFlushCallbackStack );
m_fnFlushCallbackStack = m_fnFlushCallback;
m_fnFlushCallback = fnNewCallback;
}
void SpuDrawQueue::PopFlushCallback()
{
Assert( m_fnFlushCallbackStack );
m_fnFlushCallback = m_fnFlushCallbackStack;
m_fnFlushCallbackStack = NULL;
}
void SpuDrawQueue::Shutdown()
{
g_ps3gcmGlobalState.IoSlackFree( m_pBuffer );
}
void SpuDrawQueue::UnallocToAlign()
{
m_pPut = ( uint32* )( uintp( m_pPut ) & -16 );
}
//////////////////////////////////////////////////////////////////////////
// REENTRANT: m_fnFlushCallback can in turn call AllocWords with a small number of words
//
uint32 *SpuDrawQueue::AllocWords( uint nWords /*, uint nAlignMask, uint nAlignValue*/ )
{
#ifdef _DEBUG
uint32 * pSavePut = m_pPut, *pSaveGet = m_pGet;(void)(pSavePut, pSaveGet);
m_nAllocCount++;
if( m_nAllocCount == m_nAllocBreak )
DebuggerBreak();
#endif
Assert( nWords * sizeof( uint32 ) <= SPUDRAWQUEUE_NOPCOUNT_MASK );
uint32 * pOldPut = m_pPut, * pAllocation = pOldPut;//( uint32* )( uintp( pOldPut ) + ( ( nAlignValue - uintp( pOldPut ) ) & nAlignMask ) );
uint32 * pNewPut = pAllocation + nWords;
bool bWrap = false;
if( pNewPut > m_pBufferEnd ) // do we need to wrap?
{
//we have to wrap...
if( m_pPut < m_pBufferEnd )
*m_pPut = SPUDRAWQUEUE_NOPCOUNT_METHOD | ( m_pBufferEnd - m_pPut - 1 );
pNewPut = m_pBuffer + nWords;
bWrap = true;
pAllocation = m_pBuffer;
}
// since this put may be the last, we need to make sure that even after alignment, put != get
// so we wait for the space to free up for aligned put
uint32 * pNewAlignedPut = ( uint32* )AlignValue( uintp( pNewPut ), DMA_ALIGNMENT );
if( bWrap ? pOldPut <= m_pFlushWatermark || m_pFlushWatermark < pNewAlignedPut:
pOldPut <= m_pFlushWatermark && m_pFlushWatermark < pNewAlignedPut )
{
// collects , aligns and submits commands to SPU
m_fnFlushCallback( this );
// m_pPut may have changed slightly for alignment or EndZPass(), so we need to reconsider wrapping and recompute all pointers
pOldPut = m_pPut; pAllocation = pOldPut;
pNewPut = pOldPut + nWords;
bWrap = false;
if( pNewPut > m_pBufferEnd ) // do we need to wrap?
{
//we have to wrap...
if( m_pPut < m_pBufferEnd )
*m_pPut = SPUDRAWQUEUE_NOPCOUNT_METHOD | ( m_pBufferEnd - m_pPut - 1 );
pNewPut = m_pBuffer + nWords;
bWrap = true;
pAllocation = m_pBuffer;
}
// since this put may be the last, we need to make sure that even after alignment, put != get
// so we wait for the space to free up for aligned put
pNewAlignedPut = ( uint32* )AlignValue( uintp( pNewPut ), DMA_ALIGNMENT );
}
// we must not allow new put == get, because it will cause the whole ring to suddenly be marked as empty
uint nSpins = 0;
while( bWrap ? pOldPut < m_pGet || m_pGet <= pNewAlignedPut : pOldPut < m_pGet && m_pGet <= pNewAlignedPut )
{
if( nSpins++ > 2 )
{
m_fnStallCallback( this, m_pGet, nWords );
}
SetSignal( *m_pSignal );
}
Assert( pNewPut >= m_pBuffer && pNewPut <= m_pBufferEnd );
Assert( pAllocation >= m_pBuffer && pAllocation <= m_pBufferEnd );
Assert( pAllocation + nWords >= m_pBuffer && pAllocation + nWords <= m_pBufferEnd );
m_pPut = pNewPut; // we don't need to use up the whole aligned buffer
#ifdef _DEBUG
if( pAllocation == m_nAllocBreakAddress )
DebuggerBreak();
#endif
m_nAllocWords += nWords;
return pAllocation;
}
// This is called within the Flush callback. May change m_pPut
// returns the number of bytes written from UNaligned start to UNaligned end
uint SpuDrawQueue::Collect( uint32 * pStartBatch, uint32 * pEndBatch, CDmaListConstructor & dmac )
{
#ifdef _DEBUG
CDmaListConstructor saveDmac = dmac;(void)saveDmac;
m_nCollectCount++;
Assert( m_nCollectCount != m_nCollectBreak );
#endif
Assert( pStartBatch >= m_pBuffer && pStartBatch <= m_pBufferEnd && pEndBatch >= m_pBuffer && pEndBatch <= m_pBufferEnd );
uint nSize = 0;
if( pEndBatch != pStartBatch ) // or else it's an empty transaction, nothing to upload
{
// align the put pointer for DMA - always safe because SPUs can't be processing the remainder of 16-byte block
// while we're writing into its beginning.
// while( uintp( pEndBatch ) & ( DMA_ALIGNMENT - 1 ) )
// {
// *( pEndBatch++ ) = 0;
// }
if( pEndBatch > pStartBatch )
{
// it wraps
dmac.AddInputDmaLargeUnalignedRegion( pStartBatch, pEndBatch );
nSize += uintp( pEndBatch ) - uintp( pStartBatch );
}
else
{
if( pStartBatch != m_pBufferEnd )
{
dmac.AddInputDmaLargeUnalignedRegion( pStartBatch, m_pBufferEnd );
nSize += uintp( m_pBufferEnd ) - uintp( pStartBatch );
}
dmac.AddInputDmaLargeUnalignedRegion( m_pBuffer, pEndBatch );
nSize += uintp( pEndBatch ) - uintp( m_pBuffer );
}
}
SetFlushWatermarkFrom( pEndBatch );
return nSize;
}
void SpuDrawQueue::SetFlushWatermarkFrom( uint32 *pPut )
{
m_pFlushWatermark = ( uint32* )( ( uintp( pPut ) + DRAWQUEUE_LSRING_SIZE ) & -16 );
while( m_pFlushWatermark >= m_pBufferEnd )
{
m_pFlushWatermark -= m_pBufferEnd - m_pBuffer;
}
}
uint SpuDrawQueue::Length( uint32 * pBegin, uint32 * pEnd )const
{
Assert( IsValidCursor( pBegin ) && IsValidCursor( pEnd ) );
if( pBegin < pEnd )
return uintp( pEnd ) - uintp( pBegin );
else
return ( uintp( m_pBufferEnd ) - uintp( pBegin ) ) +
( uintp( pEnd ) - uintp( m_pBuffer ) );
}

View File

@@ -0,0 +1,132 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
//
// This is PPU->SPU fifo queue to feed draw jobs
//
#ifndef SPUDRAWQUEUE_HDR
#define SPUDRAWQUEUE_HDR
#include "tier0/dbg.h"
#include "tier1/strtools.h"
#include "vjobs/pcring.h"
#include "ps3/vjobutils_shared.h"
#include "vjobs/spudrawqueue_shared.h"
extern void StallAndWarning( const char * pWarning );
class SpuDrawQueue
{
public:
typedef void ( *FnFlushCallback_t)( SpuDrawQueue * );
typedef void ( *FnStallCallback_t)( SpuDrawQueue *, uint32 * pGet, uint nWords );
void Init( uint nBufferSize, uint32 * pSignal, FnFlushCallback_t fnFlushCallback, FnStallCallback_t fnStallCallback );
void Shutdown();
void PushFlushCallback( FnFlushCallback_t fnFlushCallback );
void PopFlushCallback();
uint32 *AllocWords( uint nWords /*, uint nAlignMask = 0, uint nAlignValue = 0*/ );
void UnallocToAlign();
template<typename T>
T *AllocAligned( )
{
COMPILE_TIME_ASSERT( sizeof( T ) % 4 == 0 );
Align();
return ( T* )AllocWords( sizeof( T ) / 4 );
}
template <typename T>
T *AllocWithHeader( uint nHeader ) { uint32 * pHeader = AllocWords( 1 + sizeof( T ) / 4 ); *pHeader = nHeader; return ( T* )( pHeader + 1 ); }
uint Collect( uint32 * pStartBatch, uint32 * pEndBatch, CDmaListConstructor & dmac );
uint32 * GetCursor(){ return m_pPut; }
uint32 * GetFlushWatermark() {return m_pFlushWatermark;}
void Align();
void Push4( uint32 a, uint32 b, uint32 c, uint32 d ){ uint32 * p = AllocWords( 4 ); p[0] = a; p[1] = b; p[2] = c; p[3] = d; }
void Push3( uint32 a, uint32 b, uint32 c ){ uint32 * p = AllocWords( 3 ); p[0] = a; p[1] = b; p[2] = c; }
void Push2( uint32 a, uint32 b ){ uint32 * p = AllocWords( 2 ); p[0] = a; p[1] = b; }
void Push1( uint32 a ){ uint32 * p = AllocWords( 1 ); p[0] = a; }
enum ConstEnum_t {DMA_ALIGNMENT = 16 };
void SetFlushWatermarkFrom( uint32 *pPut );
uint32 GetSignal()const{ return ( uint32 )m_pPut; }
uint32 * GetBuffer()const{ return m_pBuffer; }
uint32 * GetBufferEnd()const { return m_pBufferEnd; }
uint32 GetBufferWords()const { return m_pBufferEnd - m_pBuffer; }
bool IsValidCursor( uint32 * p )const { return m_pBuffer <= p && p <= m_pBufferEnd && 0 == ( uintp( p ) & 3 ); }
uint32 * NormalizeCursor( uint32 * p ) { Assert( IsValidCursor( p ) ); return ( p >= m_pBufferEnd ? m_pBuffer : p ); }
uint Length( uint32 * pBegin, uint32 * pEnd )const;
protected:
void SetSignal( uint32 nSignal );
public:
uint64 m_nAllocWords;
#ifdef _DEBUG
uint64 m_nAllocCount, m_nCollectCount;
uint64 m_nAllocBreak, m_nCollectBreak;
uint32 * m_nAllocBreakAddress;
#endif
protected:
// the begin and end of the whole buffer
// it must be 16-byte aligned
uint32 *m_pBuffer, *m_pBufferEnd;
// up to this point, we may write stuff. Starting at this point, SPU is reading data
// m_pPut==m_pGet means "buffer empty"
// m_pPut > m_pGet means we can write to the end of the buffer and then start at the start
// m_pPut < m_pGet means we can write from put to get, exclusively
uint32 *m_pGet;
// this is the point where we can write stuff, up to m_pGet
uint32 *m_pPut;
// external signal in the structure where SPU writes
volatile uint32 * m_pSignal;
uint32 *m_pFlushWatermark;
// FlushCallback member is implemented elsewhere. DrawQueue calls this callback
// as an advice to flush the queue. The callback doesn't have to flush the queue
// if the current transaction is deemed atomic. Also, even if the queue is flushed,
// this object does not get immediate feedback until it reads the signal that SPU sets
// much later, asynchronously. This callback is important to slice the long transactions
// into smaller chunks that fit into LS
FnFlushCallback_t m_fnFlushCallback;
FnStallCallback_t m_fnStallCallback;
//enum EnumConst_t{STACK_SIZE = 1 };
FnFlushCallback_t m_fnFlushCallbackStack;
};
inline void SpuDrawQueue::SetSignal( uint32 nSignal )
{
uint32 *pNewGet = (uint32*)nSignal;
// the new get must be between old get and put
Assert( pNewGet == m_pGet ||
( pNewGet > m_pGet ? m_pPut < m_pGet || pNewGet <= m_pPut // the new get doesn't wrap around the buffer,
: m_pPut < m_pGet && pNewGet <= m_pPut // the new get wraps around the buffer, so the put must wrap around, too
)
);
m_pGet = pNewGet;
}
inline void SpuDrawQueue::Align()
{
while( uintp( m_pPut ) & 0xF )
{
Push1( 0 );
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,257 @@
//========== Copyright <20> Valve Corporation, All rights reserved. ========
// This is the central hub for controlling SPU activities relating to
// RSX/graphics processing/rendering
//
#ifndef SPU_GCM_HDR
#define SPU_GCM_HDR
#include "ps3/spugcm_shared.h"
//#include "ps3/rsx_spu_double_ring.h"
#include "vjobs_interface.h"
#include "ps3/vjobchain.h"
#include "ps3/vjobpool.h"
#include "ps3/ps3gcmmemory.h"
#include "spudrawqueue.h"
#include "gcmfunc.h"
#include <edge/post/edgePost_ppu.h>
#include <edge/post/edgepost_mlaa_handler_ppu.h>
extern CSpuGcmSharedState g_spuGcmShared;
extern void StallAndWarning( const char * pWarning );
class ZPass
{
public:
void Init();
bool CanBegin();
void Begin( uint32 * pCursor );
void End() { m_pCursor = NULL; }
void Shutdown();
void Validate()const{Assert( !m_nDummy && ( m_nPut - m_nGet ) <= m_nJobs );}
uint GetSubchainCapacity()const { Validate(); return m_nJobs - ( m_nPut - m_nGet ) ; }
uint64 * GetCurrentCommandPtr() { return &m_pJobs[ m_nPut & ( m_nJobs - 1 ) ]; }
void PushCommand( uint64 nCommand );
operator bool () const { return m_pCursor != NULL; }
public:
uint m_nDrawPassSubchain;
uint m_nJobPoolMarker;
uint m_nJobs;
uint m_nDummy;
uint m_nPut;
uint m_isInEndZPass;
ZPassSavedState_t * m_pSavedState;
uint32 * m_pCursor;
uint64 * m_pSubchain;
uint64 * m_pJobs; // this ring buffer contains recorded rendering jobs to be replayed
uint m_nFpcpStateEndOfJournalIdxAtZPassBegin; // ... at the beginning of Zpass
// Notice: this m_pGet member is patched by SPU after a corresponding job subchain is finished
volatile uint32 m_nGet;
protected:
};
class ALIGN128 CEdgePostWorkload
{
public:
CEdgePostWorkload(){m_isInitialized = false;}
void OnVjobsInit( VJobsRoot* pRoot );
void OnVjobsShutdown( VJobsRoot* pRoot );
void Kick( void * dst, uint nSetLabel );
bool ShouldUseLabelForSynchronization()const{return true;}
bool IsResultInMainMemory()const { return true; }
enum EnumConst_t{STAGE_COUNT=1};
EdgePostProcessStage m_stages[STAGE_COUNT];
EdgePostMlaaContext m_mlaaContext;
EdgePostWorkload m_workload;
void * m_pMlaaScratch;
bool m_isInitialized;
} ALIGN128_POST;
extern CEdgePostWorkload g_edgePostWorkload;
class CSpuGcm: public VJobInstance
{
public:
void CreateRsxBuffers();
void CreateIoBuffers();
void UseIoBufferSlack( uint nIoBufferSlack );
void OnGcmInit();
void Shutdown();
void BeginScene();
void EndScene();
void CmdBufferFlush( )
{
GcmStateFlush();
//PutPcbringCtx();
}
void CmdBufferFinish();
int OnGcmCommandBufferReserveCallback( struct CellGcmContextData *context, uint32_t nCount );
int OnGcmCommandBufferReserveCallbackOld( struct CellGcmContextData *context, uint32_t nCount );
void GcmStateFlush( );
SpuDrawHeader_t * BeginDrawBatch();
void SubmitDrawBatch( IDirect3DVertexDeclaration9 *pVertDecl, OptimizedModel::OptimizedIndexBufferMarkupPs3_t *pIbMarkup );
bool TruePause();
void RenderEmptyFrame();
void SyncMlaa( void * pLocalSurface );
void SyncMlaa( ) { SyncMlaa( m_pMlaaBuffer ); }
bool BeginZPass( );
void SetPredication( uint nPredicationMask ); // D3DPRED_* mask
void EndZPass( bool bPopMarker );
void AbortZPass(){ EndZPass( false ); }
void OnSetPixelShaderConstant();
SpuDrawQueue * GetDrawQueue(){ return &m_spuDrawQueues[m_nSpuDrawQueueSelector];}
SpuDrawQueue * GetDrawQueueNormal(){ return &m_spuDrawQueues[0]; }
void DrawQueueNormal( bool bExecuteDeferredQueueSegment = true );
struct DrawQueueDeferred_Result{ bool isFirstInFrame; };
DrawQueueDeferred_Result DrawQueueDeferred(); // may flush previous frame deferred queue
uint IsDeferredDrawQueue() { return m_nSpuDrawQueueSelector; }
bool ExecuteDeferredDrawQueue( uint nPrevious );
void FlipDeferredDrawQueue();
bool ExecuteDeferredDrawQueueSegment( uint32 * pCmdBegin, uint32 * pCmdEnd, bool bExecuteDraws );
void ValidateDeferredQueue();
//void DisableMlaaForTwoFrames();
void DisableMlaaPermanently();
void DisableMlaa();
protected:
static void OnSpuDrawQueueStallDeferredDelegator( SpuDrawQueue *pDrawQueue, uint32 * pGet, uint nWords );
void OnSpuDrawQueueStallDeferred( SpuDrawQueue *pDrawQueue, uint32 * pGet, uint nWords );
static void OnSpuDrawQueueFlushDeferred( SpuDrawQueue *pDrawQueue );
static void OnSpuDrawQueueStall( SpuDrawQueue *pDrawQueue, uint32 * pGet, uint nWords );
static void OnSpuDrawQueueFlush( SpuDrawQueue *pDrawQueue );
static void OnSpuDrawQueueFlushDoNothing( SpuDrawQueue *pDrawQueue ){}
static void OnSpuDrawQueueFlushInZPass( SpuDrawQueue *pDrawQueue );
void OnSpuDrawQueueFlushInZPass( );
void OnVjobsInit(); // gets called after m_pRoot was created and assigned
void TestPriorities();
void OnVjobsShutdown(); // gets called before m_pRoot is about to be destructed and NULL'ed
uint32 * GetPcbringPtr( uint nOffsetBytes ) { return AddBytes( m_pPcbringBuffer, nOffsetBytes & ( g_spuGcmShared.m_nPcbringSize - 1 ) ); }
uint32 * GetPcbringBufferEnd() {return AddBytes( m_pPcbringBuffer, g_spuGcmShared.m_nPcbringSize ); }
signed int GetPcbringAvailableBytes()const;
//void SetCtxBuffer( uint nSegment );
#if 0
volatile uint64* PutPcbringCtx( uint32 * pSkipTo, uint32 * pNewEnd );
volatile uint64* PutPcbringCtx();
#endif
inline uint GetMaxPcbringSegmentBytes()const { return m_nMaxPcbringSegmentBytes; }
void BeginGcmStateTransaction();
void PushSpuGcmJob( CellSpursJob128 * pJob );
void PushStateFlushJob( SpuDrawQueue * pDrawQueue, uint nResultantSpuDrawQueueSignal, uint32 *pCursorBegin, uint32 * pCursorEnd );
void PushSpuGcmJobCommand( uint64 nCommand );
void PushSpuGcmCallSubchain( uint64 * eaJobChain ){ m_jobSink.Push( CELL_SPURS_JOB_COMMAND_CALL( eaJobChain ) );}
void ZPassCheckpoint( uint nReserveSlots );
CellSpursJob128 * PushDrawBatchJob( uint nResultantSpuDrawQueueSignal, SpuDrawHeader_t * pDrawHeader, IDirect3DVertexDeclaration9 *pVertDecl, OptimizedModel::OptimizedIndexBufferMarkupPs3_t *pIbMarkup );
public:
void CloseDeferredChunk();
uint32* OpenDeferredChunk( uint nHeader = SPUDRAWQUEUE_DEFERRED_GCMFLUSH_METHOD, uint nAllocExtra = 0 );
void SetCurrentBatchCursor( uint32 * pCursor )
{
m_pCurrentBatchCursor[m_nSpuDrawQueueSelector] = pCursor;
}
uint32 * GetCurrentBatchCursor()
{
return m_pCurrentBatchCursor[m_nSpuDrawQueueSelector];
}
protected:
SpuDrawQueue m_spuDrawQueues[2];
// this frame [0] and previous frames [1] "end" markers for replay
// gets updated on every chunk close
uint32* m_pDeferredQueueCursors[3];
// this is the last point where DrawQueueDeferred() was called
uint32 * m_pDeferredQueueSegment;
// pointer to deferred chunk last open; NULL if the last deferred chunk was closed, but none new was open yet
// this may stay non-NULL( thus indicating non-closed chunk) during executing deferred commands, too,
// in case of out-of-memory condition. Then, StallDeferred callback will execute deferred commands without closing current chunk.
// Relation: MANY chunks per ONE batch
uint32* m_pDeferredChunkHead;
uint32 m_nDeferredChunkHead;
uint32 *m_pDeferredChunkSubmittedTill[4]; // only [1] is used; [0] and [2] are write- and debug-only
uint16 m_nSpuDrawQueueSelector;
uint16 m_nFramesToDisableDeferredQueue; // disable for this number of frames if we don't have enough memory
public:
// fragment program constant patcher double ring, JTS->RET , RSX->SPU
CPs3gcmLocalMemoryBlock m_fpcpRingBuffer, m_edgeGeomRingBuffer;
VjobChain3 m_jobSink;
VjobPool<CellSpursJob128> m_jobPool128;
volatile uint32 * m_pFinishLabel;
uint32 *m_pPcbringBuffer;
ZPass m_zPass; // NULL when we aren't in Zpass
DeferredState_t * m_pDeferredStates[2];
uint m_nPcbringBegin; // this byte offset corresponds to GCM_CTX->begin
uint32 m_nPcbringWaitSpins;
uint32 m_nMaxPcbringSegmentBytes;
uint32 m_nGcmFlushJobScratchSize;
uintp m_eaLastJobThatUpdatesSharedState;
uint m_nFpcpStateEndOfJournalIdxAtSpuGcmJob;
enum TransactionBatchEnum_t
{
BATCH_GCMSTATE, // the default transaction type
BATCH_DRAW
};
TransactionBatchEnum_t m_nCurrentBatch;
// the batch is a batch of commands to send to an SPU job: job_gcmflush (BATCH_GCMSTATE) or job_drawindexedprimitive (BATCH_DRAW)
uint32 * m_pCurrentBatchCursor[2];
void * m_pMlaaBuffer, *m_pMlaaBufferOut;
volatile vec_uint4 * m_pMlaaBufferCookie;
uint32 *m_pEdgePostRsxLock;
uint m_nFrame;
#ifdef _DEBUG
uint m_nJobsPushed, m_nChunksClosedInSegment;
#endif
uint64 m_nDeferredQueueWords;
bool m_bUseDeferredDrawQueue;
};
extern CSpuGcm g_spuGcm;
extern const vec_uint4 g_vuSpuGcmCookie;
struct ALIGN128 PriorityTest_t
{
CellSpursJob128 m_job;
job_notify::NotifyArea_t m_notify;
bool Test( class VjobChain4 *pJobChain );
} ALIGN128_POST;
#endif