文章目录[x]
- 1:使用gl_InstanceID 进行实例化
- 2:使用实例化数组绘制
- 3:小行星带
目前我们接触到的绘制方式,每绘制一个物体都会产生一个DrawCall(glDrawArray/glDrawElments) , 一旦物体数量过多,则会严重拖慢程序。有时候场景中要频繁绘制一些模型的时候,使用GPUInstance是一个不错的解决方案。如要绘制草地,小行星带等。
使用gl_InstanceID 进行实例化
View Code
class Basic : public SubTestBed
{
protected:
std::string m_ShaderPath = "SandBox/21_Instance/Basic.shader";
glm::vec2 translations[100];
public:
virtual void Setup() override
{
float quadVertices[] = {
// 位置 // 颜色
-0.05f, 0.05f, 1.0f, 0.0f, 0.0f,
0.05f, -0.05f, 0.0f, 1.0f, 0.0f,
-0.05f, -0.05f, 0.0f, 0.0f, 1.0f,
-0.05f, 0.05f, 1.0f, 0.0f, 0.0f,
0.05f, -0.05f, 0.0f, 1.0f, 0.0f,
0.05f, 0.05f, 0.0f, 1.0f, 1.0f
};
int index = 0;
float offset = 0.1f;
for (int y = -10; y < 10; y += 2)
{
for (int x = -10; x < 10; x += 2)
{
glm::vec2 translation;
translation.x = (float)x / 10.0f + offset;
translation.y = (float)y / 10.0f + offset;
translations[index++] = translation;
}
}
m_VA[0] = new VertexArray();
m_VB[0] = new VertexBuffer(quadVertices, sizeof(quadVertices));
VertexBufferLayout layout;
layout.Push<float>(2);
layout.Push<float>(3);
m_VA[0]->AddBuffer(*m_VB[0], layout);
m_Shader[0] = new Shader(m_ShaderPath);
m_ClearColor = glm::vec4(0.1f);
CreateFreeCamera();
}
virtual void Update(float dt) override
{
m_TestScene->Update(dt);
}
virtual void Render() override
{
m_Shader[0]->Bind();
for (unsigned int i = 0; i < 100; i++)
{
m_Shader[0]->SetVec2(String::Format("offsets[%d]", i), translations[i]);
}
glBindVertexArray(m_VA[0]->GetID());
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 100);
}
};
#Shader Vertex
#version 330 core
layout(location = 0) in vec2 a_position;
layout(location = 1) in vec3 a_color;
out vec3 fColor;
uniform vec2 offsets[100];
void main()
{
fColor = a_color;
gl_Position = vec4(a_position + offsets[gl_InstanceID], 0.0, 1.0);
}
#Shader Fragment
#version 330
out vec4 fragColor;
in vec3 fColor;
void main()
{
fragColor = vec4(fColor, 1.0);
}
通过这种方式绘制,我们需要为其准备一个数组,一旦当要绘制的数量过多,就有可能超过uniform能接受的数据的最大上限。
使用实例化数组绘制
View Code
class InstanceArray : public Basic
{
public:
virtual void Setup() override
{
float quadVertices[] = {
// 位置 // 颜色
-0.05f, 0.05f, 1.0f, 0.0f, 0.0f,
0.05f, -0.05f, 0.0f, 1.0f, 0.0f,
-0.05f, -0.05f, 0.0f, 0.0f, 1.0f,
-0.05f, 0.05f, 1.0f, 0.0f, 0.0f,
0.05f, -0.05f, 0.0f, 1.0f, 0.0f,
0.05f, 0.05f, 0.0f, 1.0f, 1.0f
};
int index = 0;
float offset = 0.1f;
for (int y = -10; y < 10; y += 2)
{
for (int x = -10; x < 10; x += 2)
{
glm::vec2 translation;
translation.x = (float)x / 10.0f + offset;
translation.y = (float)y / 10.0f + offset;
translations[index++] = translation;
}
}
m_VA[0] = new VertexArray();
m_VB[0] = new VertexBuffer(quadVertices, sizeof(quadVertices));
VertexBufferLayout layout1;
layout1.Push<float>(2);
layout1.Push<float>(3);
m_VA[0]->AddBuffer(*m_VB[0], layout1);
// 新生成一个VBO,用来装offsets
m_VB[1] = new VertexBuffer(&translations[0], sizeof(glm::vec2) * 100);
VertexBufferLayout layout2;
layout2.Push<float>(2);
m_VA[0]->AddBuffer(*m_VB[1], layout2);
// 它的第一个参数是顶点属性(就是顶点着色器中的layout(location=x)中的x),第二个参数是属性除数(Attribute Divisor)。
// 默认情况下,属性除数是0,告诉OpenGL我们需要在顶点着色器的每次迭代时更新顶点属性。
// 将它设置为1时,我们告诉OpenGL我们希望在渲染一个新实例的时候更新顶点属性。
// 设置为2时,我们希望每2个实例更新一次属性,以此类推。
// 我们将属性除数设置为1,是在告诉OpenGL,处于位置值2的顶点属性是一个实例化数组。
GLCall(glVertexAttribDivisor(2, 1));
m_ShaderPath = "SandBox/21_Instance/InstanceArray.shader";
m_Shader[0] = new Shader(m_ShaderPath);
m_ClearColor = glm::vec4(0.1f);
CreateFreeCamera();
}
virtual void Update(float dt) override
{
m_TestScene->Update(dt);
}
virtual void Render() override
{
m_Shader[0]->Bind();
glBindVertexArray(m_VA[0]->GetID());
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 100);
}
};
#Shader Vertex
#version 330 core
layout(location = 0) in vec2 a_position;
layout(location = 1) in vec3 a_color;
layout(location = 2) in vec2 a_offset;
out vec3 fColor;
void main()
{
fColor = a_color;
vec2 pos = a_position * (gl_InstanceID / 100.0);
gl_Position = vec4(pos + a_offset, 0.0, 1.0);
}
#Shader Fragment
#version 330
out vec4 fragColor;
in vec3 fColor;
void main()
{
fragColor = vec4(fColor, 1.0);
}
帧率高达2471,惊呆了。
现在使用实例化数组绘制一个小行星带吧。为了更好的对比,一次绘制不使用GPUInstance,一次绘制使用GPUInstance。Let's go!
小行星带
使用GPUInstance绘制小行星带
class AsteroidBeltInstance : public SubTestBed
{
glm::mat4 *modelMatrices;
unsigned int amount = 200000;
public:
virtual void Setup() override
{
m_ClearColor = glm::vec4(0.1f);
CreateFreeCamera();
m_Model[0] = new Model("Resources/Model/planet/planet.obj");
m_Model[1] = new Model("Resources/Model/rock/rock.obj");
m_Shader[0] = new Shader("SandBox/21_Instance/Planet.shader");
m_Shader[1] = new Shader("SandBox/21_Instance/AsteriodInstance.shader");
// 生成1000个随机变换矩阵
modelMatrices = new glm::mat4[amount];
srand((unsigned int)glfwGetTime());
float radius = 50.0f;
float offset = 2.5f;
for (unsigned int i = 0; i < amount; i++)
{
radius = ((i / 5000) + 1) * 12.0f + 20.0f;
glm::mat4 model = glm::identity<glm::mat4>();
// 1.Translate,分布在半径为radius的圆形上,范围是[-offset, +offset]
float angle = (float)i / amount * 360.0f; // 把360度角均分为1000份
// rand() % m 表示产生 [0, m) 的随机数,所以下面代码会产生 [-offset, +offset] 的随机数
float displacement = (rand() % (int)(2 * offset * 100 + 1)) / 100.0f - offset;
float x = sin(angle) * radius + displacement; // 圆的参数方程
displacement = (rand() % (int)(2 * offset * 100)) / 100.0f - offset;
float y = displacement * 0.4f; // 让行星带的高度比x和z的宽度要小
displacement = (rand() % (int)(2 * offset * 100)) / 100.0f - offset;
float z = cos(angle) * radius + displacement; // 圆的参数方程
model = glm::translate(model, glm::vec3(x, y, z));
// 2. 缩放:在 0.05 和 0.25f 之间缩放
float scale = (rand() % 20) / 100.0f + 0.05f;
model = glm::scale(model, glm::vec3(scale));
// 3. 旋转:绕着一个(半)随机选择的旋转轴向量进行随机的旋转
float rotAngle = (float)(rand() % 360);
model = glm::rotate(model, rotAngle, glm::vec3(0.4f, 0.6f, 0.8f));
// 4. 添加到矩阵的数组中
modelMatrices[i] = model;
}
// 把上面生成的矩阵装到VBO里
unsigned int buffer;
glGenBuffers(1, &buffer);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
glBufferData(GL_ARRAY_BUFFER, amount * sizeof(glm::mat4), &modelMatrices[0], GL_STATIC_DRAW);
// 设置 4 x vec4的矩阵
// |x,x,x,x| |x,x,x,x| |x,x,x,x|
// |x,x,x,x| |x,x,x,x| |x,x,x,x|
// |x,x,x,x| |x,x,x,x|...|x,x,x,x|
// |x,x,x,x| |x,x,x,x| |x,x,x,x|
// 上述矩阵在VBO中的排列为
// |x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x| |x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x|...|x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x|
// 所以步长为一个sizeof(mat4), 每行的数据offset分别为0 1 2 3倍sizeof(vec4)
GLsizei vec4Size = sizeof(glm::vec4);
for (unsigned int i = 0; i < m_Model[1]->meshes().size(); i++)
{
m_Model[1]->meshes()[i].GetVertexArray()->Bind();
glEnableVertexAttribArray(3); // 0 1 2已经被顶点位置,法线,纹理占用了。所以这里从3开始
glVertexAttribPointer(3, 4, GL_FLOAT, GL_FALSE, 4 * vec4Size, (void*)0);
glEnableVertexAttribArray(4);
glVertexAttribPointer(4, 4, GL_FLOAT, GL_FALSE, 4 * vec4Size, (void*)(vec4Size));
glEnableVertexAttribArray(5);
glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, 4 * vec4Size, (void*)(2 * vec4Size));
glEnableVertexAttribArray(6);
glVertexAttribPointer(6, 4, GL_FLOAT, GL_FALSE, 4 * vec4Size, (void*)(3 * vec4Size));
// 将3 4 5 6location视为一个实例化数据
glVertexAttribDivisor(3, 1);
glVertexAttribDivisor(4, 1);
glVertexAttribDivisor(5, 1);
glVertexAttribDivisor(6, 1);
glBindVertexArray(0);
}
}
virtual void Update(float dt) override
{
m_TestScene->Update(dt);
}
virtual void Render() override
{
// m_TestScene->Render();
m_Shader[0]->Bind();
glm::mat4 model = glm::identity<glm::mat4>();
model = glm::translate(model, glm::vec3(0.0f, -3.0f, 0.0f));
model = glm::scale(model, glm::vec3(4.0f, 4.0f, 4.0f));
m_Shader[0]->SetMat4f("model", model);
m_Shader[0]->SetMat4f("view", m_Camera->GetViewMatrix());
m_Shader[0]->SetMat4f("projection", m_Camera->GetProjectionMatrix());
m_Model[0]->Draw(m_Shader[0]);
m_Shader[1]->Bind();
m_Shader[1]->SetMat4f("view", m_Camera->GetViewMatrix());
m_Shader[1]->SetMat4f("projection", m_Camera->GetProjectionMatrix());
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, m_Model[1]->textures()[0].GetID());
m_Shader[1]->SetInt("material.texture_diffuse1", 0);
for (unsigned int i = 0; i < m_Model[1]->meshes().size(); i++)
{
m_Model[1]->meshes()[i].GetVertexArray()->Bind();
glDrawElementsInstanced(GL_TRIANGLES, m_Model[1]->meshes()[i].indices.size(), GL_UNSIGNED_INT, 0, amount);
}
}
};
#Shader Vertex
#version 330 core
layout(location = 0) in vec3 a_position;
layout(location = 2) in vec2 a_texCoord;
layout(location = 3) in mat4 a_InstanceModel;
out vec2 TexCoords;
uniform mat4 view;
uniform mat4 projection;
void main()
{
TexCoords = a_texCoord;
gl_Position = projection * view * a_InstanceModel * vec4(a_position, 1.0);
}
#Shader Fragment
#version 330 core
out vec4 fragColor;
struct Material
{
sampler2D texture_diffuse1;
};
uniform Material material;
in vec2 TexCoords;
void main()
{
fragColor = texture(material.texture_diffuse1, TexCoords);
}
本人机器AMD2700+RTX2060
前者未使用GPUInstance,绘制1000个小行星加一个大行星。共计1001次DrawCall。帧率低至26
后者使用GPUInstance,绘制20万个小行星加一个大行星。共计2次DrawCall(大行星和小行星带个一个)。帧率稳定在70以上。