Ogre C++ AMP

A place to show off your latest screenshots and for people to comment on them. Only start a new thread here if you have some nice images to show off!
Post Reply
Tannz0rz
Gnoblar
Posts: 5
Joined: Tue Oct 23, 2012 2:12 am

Ogre C++ AMP

Post by Tannz0rz »

Hey it's me again Ogre community! I return after making my previous thread about directly writing to the texture buffer in order to use it elsewhere in my game, and I mentioned that I wouldn't bother directly messing with GPU stuffs (OpenCL in particular.) Well, that has changed with my discovery of C++ AMP, and I bring you my new-and-improved code! The bleed kernel itself runs at an average of 2ms on a 800x600 texture, which is a LARGE improvement from the CPU-run code.

Without further ado I give you a video of the new effect:
[youtube]HPS0G-QLS3Y[/youtube]

And the code:

BleedTexture.h

Code: Select all

#include "stdafx.h"

#ifndef BLEED_TEXTURE_H
#define BLEED_TEXTURE_H

#include <amp.h>

using namespace concurrency;
using namespace Ogre;

#define BYTE_ARGB(a, r, g, b) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
#define WORD_ARGB(a, r, g, b) ((((a) & 0xff) << 24) | (((r) & 0xff) << 16) | (((g) & 0xff) << 8) | ((b) & 0xff))
#define A(c) ((c) >> 24)
#define R(c) (((c) >> 16) & 0xff)
#define G(c) (((c) >> 8) & 0xff)
#define B(c) ((c) & 0xff)
#define ValidTexCoord(x, y, width, height) (x >= 0 && x < width && y >= 0 && y < height)

void BleedKernel(array<uint32, 2> &buffer, bool lessthan, uint32 const_a, uint32 const_r, uint32 const_g, uint32 const_b)
{
	array<uint32, 2>
		tmp(buffer.extent);
	
	parallel_for_each(buffer.extent, [=, &buffer, &tmp](index<2> idx) restrict(amp)
	{
		const int32
			offset[9][2] =
			{
				{-1	, -1},
				{ 0	, -1},
				{ 1	, -1},
				{-1	, 0 },
				{ 0	, 0 },
				{ 1	, 0	},
				{-1	, 1 },
				{ 0	, 1	},
				{ 1	, 1	}
			};

		int32
			x, y;
		uint32
			color,
			max = (lessthan) ? (~0) : (0),
			a = 0, r = 0, g = 0, b = 0,	num = 0;

		for(uint32 i = 0; i < 9; ++i)
		{
			x = idx[1] + offset[i][0];
			y = idx[0] + offset[i][1];

			if(ValidTexCoord(x, y, buffer.extent[1], buffer.extent[0]))
			{
				color = buffer(y, x);

				a += A(color);
				r += R(color);
				g += G(color);
				b += B(color);

				num++;

				if((color < max) == lessthan)
				{
					max = color;
				}
			}
		}

		a = (const_a == ~0) ? (((a / num) + A(max)) / 2) : (const_a);
		r = (const_r == ~0) ? (((r / num) + R(max)) / 2) : (const_r);
		g = (const_g == ~0) ? (((g / num) + G(max)) / 2) : (const_g);
		b = (const_b == ~0) ? (((b / num) + B(max)) / 2) : (const_b);

		tmp(idx) = WORD_ARGB(a, r, g, b);
	});
	
	buffer = tmp;
}

void CircleKernel(array<uint32, 2> &buffer, uint32 x, uint32 y, uint32 radius, uint32 color)
{
	parallel_for_each(buffer.extent, [=, &buffer](index<2> idx) restrict(amp)
	{
		if((idx[1] - x) * (idx[1] - x) + (idx[0] - y) * (idx[0] - y) < radius * radius)
		{
			buffer(idx) = color;
		}
	});
}

template<typename T, int32 Rank>
void Fill(array<T, Rank> &arr, T val)
{ 
	parallel_for_each(arr.extent, [&arr, val](index<Rank> idx) restrict(amp)
	{
		arr(idx) = val;
	}); 
}

class BleedTexture
{
	static int32
		id;

	array<uint32, 2>
		buf;
	
public:
	const int32
		width, 
		height;

	TexturePtr
		textureptr;

	BleedTexture(const int32 width, const int32 height, uint32 bgcolor = 0) : 
		width(width),
		height(height),
		buf(extent<2>(height, width))
	{
		Fill<uint32, 2>(buf, bgcolor);
		
		textureptr = TextureManager::getSingleton().createManual("Bleed" + StringConverter::toString(++id), ResourceGroupManager::DEFAULT_RESOURCE_GROUP_NAME, TextureType::TEX_TYPE_2D, width, height, 0, PixelFormat::PF_A8R8G8B8);
	}

	~BleedTexture()
	{
	}

	void Bleed(bool lessthan = true, uint32 const_a = ~0, uint32 const_r = ~0, uint32 const_g = ~0, uint32 const_b = ~0)
	{
		BleedKernel(buf, lessthan, const_a, const_r, const_g, const_b);
	}

	void Circle(uint32 x, uint32 y, uint32 radius, uint32 color)
	{
		CircleKernel(buf, x, y, radius, color);
	}

	void CopyToBuffer()
	{
		HardwarePixelBufferSharedPtr 
			pixelbuffer = textureptr->getBuffer();

		copy(buf, stdext::make_checked_array_iterator<uint32 *>((uint32 *)pixelbuffer->lock(HardwareBuffer::HBL_DISCARD), width * height));

		pixelbuffer->unlock();
	}

	void Reset(uint32 bgcolor)
	{
		Fill<uint32, 2>(buf, bgcolor);
	}
};

int32 
	BleedTexture::id = 0;

#endif
Application.cpp

Code: Select all

#define rnd(i) ((i) * rand() / RAND_MAX)

void Application::createScene()
{
		bleedtexture = new BleedTexture(window->getWidth(), window->getHeight());

		MaterialPtr 
			BleedMaterial = MaterialManager::getSingleton().create("BleedMaterial", Ogre::ResourceGroupManager::DEFAULT_RESOURCE_GROUP_NAME);

		BleedMaterial->getTechnique(0)->getPass(0)->createTextureUnitState("Bleed")->setTexture(bleedtexture->textureptr);
 
		rect = new Rectangle2D(true);
		rect->setCorners(-1.0, 1.0, 1.0, -1.0);
		rect->setBoundingBox(AxisAlignedBox(-100000.0 * Vector3::UNIT_SCALE, 100000.0 * Vector3::UNIT_SCALE));
		rect->setMaterial("BleedMaterial");
 
		SceneNode
			*node = scenemgr->getRootSceneNode()->createChildSceneNode("BleedRect");

		node->attachObject(rect);
		node->setVisible(true);
}

bool Application::frameRenderingQueued(const FrameEvent& evt)
{
	uchar 
		random = uchar(rnd(64)) + 64;

	bleedtexture->Bleed(false);
	bleedtexture->Circle(rnd(bleedtexture->width), rnd(bleedtexture->height), rnd(50) + 25, BYTE_ARGB(255, random, random, random));
	bleedtexture->CopyToBuffer();

	return true;
}
The primary downside I suppose are the VC++2012 Redistributable system requirements. Despite that, I heavily advise everyone to check it out! The new features of VC++2012 are awesome, not to mention C++0x/C++11!

Anyways, thanks Ogre team for the awesome engine and thanks Transporter for your up-to-date snapshots.

-Tannz0rz
elgregor
Gnoblar
Posts: 18
Joined: Wed Feb 23, 2011 5:02 pm
Location: Poland

Re: Ogre C++ AMP

Post by elgregor »

You might want to know that C++ AMP support is being implemented in LLVM using OpenCL. Link: http://www.phoronix.com/scan.php?page=n ... px=MTIyOTk
Sorry for my English :)
Pulas
Halfling
Posts: 61
Joined: Sat Oct 29, 2011 9:39 am

Re: Ogre C++ AMP

Post by Pulas »

elgregor wrote:You might want to know that C++ AMP support is being implemented in LLVM using OpenCL. Link: http://www.phoronix.com/scan.php?page=n ... px=MTIyOTk
Wow, great.

Microsoft: C++ AMP
NVIDIA: OpenACC, Thrust

Which one is better?
Post Reply