aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRay <raysan5@gmail.com>2014-12-31 19:28:38 +0100
committerRay <raysan5@gmail.com>2014-12-31 19:28:38 +0100
commit3c4a91658e8586ecb5504d3167796d043e79d065 (patch)
tree89aeddb683c133c6d94f1a03f9d1a1399d2a6bf4 /src
parent0c606092689d7a1823f7acd5e8b849052e297b7a (diff)
parentfad81f36e4cfd37901caad8555c49c41ac65aaee (diff)
downloadraylib-3c4a91658e8586ecb5504d3167796d043e79d065.tar.gz
raylib-3c4a91658e8586ecb5504d3167796d043e79d065.zip
Merge pull request #12 from raysan5/develop
Integration from develop branch
Diffstat (limited to 'src')
-rw-r--r--src/audio.c106
-rw-r--r--src/core.c315
-rw-r--r--src/makefile48
-rw-r--r--src/models.c93
-rw-r--r--src/raylib.h33
-rw-r--r--src/raymath.c49
-rw-r--r--src/raymath.h8
-rw-r--r--src/rlgl.c70
-rw-r--r--src/rlgl.h7
-rw-r--r--src/shapes.c4
-rw-r--r--src/stb_image.h2810
-rw-r--r--src/text.c180
-rw-r--r--src/textures.c173
-rw-r--r--src/utils.c46
-rw-r--r--src/utils.h1
15 files changed, 3026 insertions, 917 deletions
diff --git a/src/audio.c b/src/audio.c
index fd482534..40c24895 100644
--- a/src/audio.c
+++ b/src/audio.c
@@ -45,9 +45,14 @@
// Defines and Macros
//----------------------------------------------------------------------------------
#define MUSIC_STREAM_BUFFERS 2
-#define MUSIC_BUFFER_SIZE 4096*2 // PCM data buffer (short) - 16Kb
- // NOTE: Reduced to avoid frame-stalls on RPI
-//#define MUSIC_BUFFER_SIZE 4096*8 // PCM data buffer (short) - 64Kb
+
+#if defined(PLATFORM_RPI)
+ // NOTE: On RPI should be lower to avoid frame-stalls
+ #define MUSIC_BUFFER_SIZE 4096*2 // PCM data buffer (short) - 16Kb (RPI)
+#else
+ // NOTE: On HTML5 (emscripten) this is allocated on heap, by default it's only 16MB!...just take care...
+ #define MUSIC_BUFFER_SIZE 4096*8 // PCM data buffer (short) - 64Kb
+#endif
//----------------------------------------------------------------------------------
// Types and Structures Definition
@@ -69,15 +74,6 @@ typedef struct Music {
} Music;
-// Wave file data
-typedef struct Wave {
- void *data; // Buffer data pointer
- unsigned int dataSize; // Data size in bytes
- unsigned int sampleRate;
- short bitsPerSample;
- short channels;
-} Wave;
-
//----------------------------------------------------------------------------------
// Global Variables Definition
//----------------------------------------------------------------------------------
@@ -106,7 +102,7 @@ void InitAudioDevice(void)
// Open and initialize a device with default settings
ALCdevice *device = alcOpenDevice(NULL);
- if(!device) TraceLog(ERROR, "Could not open audio device");
+ if(!device) TraceLog(ERROR, "Audio device could not be opened");
ALCcontext *context = alcCreateContext(device, NULL);
@@ -205,12 +201,64 @@ Sound LoadSound(char *fileName)
// Attach sound buffer to source
alSourcei(source, AL_BUFFER, buffer);
+
+ TraceLog(INFO, "[%s] Sound file loaded successfully (SampleRate: %i, BitRate: %i, Channels: %i)", fileName, wave.sampleRate, wave.bitsPerSample, wave.channels);
+
+ // Unallocate WAV data
+ UnloadWave(wave);
+
+ sound.source = source;
+ sound.buffer = buffer;
+ }
+
+ return sound;
+}
+
+// Load sound from wave data
+Sound LoadSoundFromWave(Wave wave)
+{
+ Sound sound;
+
+ if (wave.data != NULL)
+ {
+ ALenum format = 0;
+ // The OpenAL format is worked out by looking at the number of channels and the bits per sample
+ if (wave.channels == 1)
+ {
+ if (wave.bitsPerSample == 8 ) format = AL_FORMAT_MONO8;
+ else if (wave.bitsPerSample == 16) format = AL_FORMAT_MONO16;
+ }
+ else if (wave.channels == 2)
+ {
+ if (wave.bitsPerSample == 8 ) format = AL_FORMAT_STEREO8;
+ else if (wave.bitsPerSample == 16) format = AL_FORMAT_STEREO16;
+ }
+
+ // Create an audio source
+ ALuint source;
+ alGenSources(1, &source); // Generate pointer to audio source
+
+ alSourcef(source, AL_PITCH, 1);
+ alSourcef(source, AL_GAIN, 1);
+ alSource3f(source, AL_POSITION, 0, 0, 0);
+ alSource3f(source, AL_VELOCITY, 0, 0, 0);
+ alSourcei(source, AL_LOOPING, AL_FALSE);
+
+ // Convert loaded data to OpenAL buffer
+ //----------------------------------------
+ ALuint buffer;
+ alGenBuffers(1, &buffer); // Generate pointer to buffer
+
+ // Upload sound data to buffer
+ alBufferData(buffer, format, wave.data, wave.dataSize, wave.sampleRate);
+
+ // Attach sound buffer to source
+ alSourcei(source, AL_BUFFER, buffer);
// Unallocate WAV data
UnloadWave(wave);
- TraceLog(INFO, "[%s] Sound file loaded successfully", fileName);
- TraceLog(INFO, "[%s] Sample rate: %i - Channels: %i", fileName, wave.sampleRate, wave.channels);
+ TraceLog(INFO, "[Wave] Sound file loaded successfully (SampleRate: %i, BitRate: %i, Channels: %i)", wave.sampleRate, wave.bitsPerSample, wave.channels);
sound.source = source;
sound.buffer = buffer;
@@ -235,7 +283,10 @@ Sound LoadSoundFromRES(const char *rresName, int resId)
FILE *rresFile = fopen(rresName, "rb");
- if (!rresFile) TraceLog(WARNING, "[%s] Could not open raylib resource file", rresName);
+ if (rresFile == NULL)
+ {
+ TraceLog(WARNING, "[%s] rRES raylib resource file could not be opened", rresName);
+ }
else
{
// Read rres file (basic file check - id)
@@ -327,12 +378,12 @@ Sound LoadSoundFromRES(const char *rresName, int resId)
// Attach sound buffer to source
alSourcei(source, AL_BUFFER, buffer);
+
+ TraceLog(INFO, "[%s] Sound loaded successfully from resource (SampleRate: %i, BitRate: %i, Channels: %i)", rresName, wave.sampleRate, wave.bitsPerSample, wave.channels);
// Unallocate WAV data
UnloadWave(wave);
- TraceLog(INFO, "[%s] Sound loaded successfully from resource, sample rate: %i", rresName, (int)sampleRate);
-
sound.source = source;
sound.buffer = buffer;
}
@@ -447,7 +498,10 @@ void PlayMusicStream(char *fileName)
// Open audio stream
currentMusic.stream = stb_vorbis_open_filename(fileName, NULL, NULL);
- if (currentMusic.stream == NULL) TraceLog(WARNING, "[%s] Could not open ogg audio file", fileName);
+ if (currentMusic.stream == NULL)
+ {
+ TraceLog(WARNING, "[%s] OGG audio file could not be opened", fileName);
+ }
else
{
// Get file info
@@ -537,11 +591,13 @@ void ResumeMusicStream(void)
// Check if music is playing
bool MusicIsPlaying(void)
{
- ALenum state;
+ bool playing = false;
+ ALint state;
alGetSourcei(currentMusic.source, AL_SOURCE_STATE, &state);
+ if (state == AL_PLAYING) playing = true;
- return (state == AL_PLAYING);
+ return playing;
}
// Set volume for music
@@ -712,9 +768,9 @@ static Wave LoadWAV(const char *fileName)
wavFile = fopen(fileName, "rb");
- if (!wavFile)
+ if (wavFile == NULL)
{
- TraceLog(WARNING, "[%s] Could not open WAV file", fileName);
+ TraceLog(WARNING, "[%s] WAV file could not be opened", fileName);
}
else
{
@@ -766,7 +822,7 @@ static Wave LoadWAV(const char *fileName)
wave.channels = waveFormat.numChannels;
wave.bitsPerSample = waveFormat.bitsPerSample;
- TraceLog(INFO, "[%s] Wave file loaded successfully", fileName);
+ TraceLog(INFO, "[%s] WAV file loaded successfully (SampleRate: %i, BitRate: %i, Channels: %i)", fileName, wave.sampleRate, wave.bitsPerSample, wave.channels);
}
}
}
@@ -815,6 +871,8 @@ static Wave LoadOGG(char *fileName)
int samplesObtained = stb_vorbis_get_samples_short_interleaved(oggFile, info.channels, wave.data, totalSamplesLength);
TraceLog(DEBUG, "[%s] Samples obtained: %i", fileName, samplesObtained);
+
+ TraceLog(INFO, "[%s] OGG file loaded successfully (SampleRate: %i, BitRate: %i, Channels: %i)", fileName, wave.sampleRate, wave.bitsPerSample, wave.channels);
stb_vorbis_close(oggFile);
diff --git a/src/core.c b/src/core.c
index dec08eeb..5ff44f89 100644
--- a/src/core.c
+++ b/src/core.c
@@ -8,6 +8,7 @@
* PLATFORM_DESKTOP - Windows, Linux, Mac (OSX)
* PLATFORM_ANDROID - Only OpenGL ES 2.0 devices
* PLATFORM_RPI - Rapsberry Pi (tested on Raspbian)
+* PLATFORM_WEB - Emscripten, HTML5
*
* On PLATFORM_DESKTOP, the external lib GLFW3 (www.glfw.com) is used to manage graphic
* device, OpenGL context and input on multiple operating systems (Windows, Linux, OSX).
@@ -49,7 +50,7 @@
#include <string.h> // String function definitions, memset()
#include <errno.h> // Macros for reporting and retrieving error conditions through error codes
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
#include <GLFW/glfw3.h> // GLFW3 library: Windows, OpenGL context and Input management
//#include <GL/gl.h> // OpenGL functions (GLFW3 already includes gl.h)
//#define GLFW_DLL // Using GLFW DLL on Windows -> No, we use static version!
@@ -91,7 +92,7 @@
//----------------------------------------------------------------------------------
// Defines and Macros
//----------------------------------------------------------------------------------
-// ...
+#define MAX_TOUCH_POINTS 256
//----------------------------------------------------------------------------------
// Types and Structures Definition
@@ -101,7 +102,7 @@
//----------------------------------------------------------------------------------
// Global Variables Definition
//----------------------------------------------------------------------------------
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
static GLFWwindow *window; // Native window (graphic device)
#elif defined(PLATFORM_ANDROID)
static struct android_app *app; // Android activity
@@ -111,7 +112,14 @@ static bool windowReady = false; // Used to detect display initia
// Gestures detection variables
static float tapTouchX, tapTouchY;
+static int64_t lastTapTime = 0;
+static float lastTapX = 0, lastTapY = 0;
static bool touchTap = false;
+static bool doubleTap = false;
+static bool drag = false;
+static int stdVector[MAX_TOUCH_POINTS];
+static int indexPosition = 0;
+const AInputEvent* eventDrag;
static int32_t touchId;
const int32_t DOUBLE_TAP_TIMEOUT = 300*1000000;
const int32_t DOUBLE_TAP_SLOP = 100;
@@ -160,12 +168,12 @@ static int renderOffsetY = 0; // Offset Y from render area (must b
static bool fullscreen = false; // Fullscreen mode (useful only for PLATFORM_DESKTOP)
static Matrix downscaleView; // Matrix to downscale view (in case screen size bigger than display size)
-#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
static const char *windowTitle; // Window text title...
static char configFlags = 0;
static bool customCursor = false; // Tracks if custom cursor has been set
-static bool cursorOnScreen = true; // Tracks if cursor is inside client area
+static bool cursorOnScreen = false; // Tracks if cursor is inside client area
static Texture2D cursor; // Cursor texture
static Vector2 mousePosition;
@@ -183,6 +191,7 @@ static int previousMouseWheelY = 0; // Required to track mouse wheel var
static int currentMouseWheelY = 0; // Required to track mouse wheel variation
static int exitKey = KEY_ESCAPE; // Default exit key (ESC)
+static int lastKeyPressed = -1;
#endif
#if defined(PLATFORM_ANDROID)
@@ -226,12 +235,17 @@ static void RestoreKeyboard(void); // Restore keyboard syst
static void InitGamepad(void); // Init raw gamepad input
#endif
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
static void ErrorCallback(int error, const char *description); // GLFW3 Error Callback, runs on GLFW3 error
static void KeyCallback(GLFWwindow *window, int key, int scancode, int action, int mods); // GLFW3 Keyboard Callback, runs on key pressed
+static void MouseButtonCallback(GLFWwindow *window, int button, int action, int mods); // GLFW3 Mouse Button Callback, runs on mouse button pressed
+static void CharCallback(GLFWwindow *window, unsigned int key); // GLFW3 Char Key Callback, runs on key pressed (get char value)
static void ScrollCallback(GLFWwindow *window, double xoffset, double yoffset); // GLFW3 Srolling Callback, runs on mouse wheel
static void CursorEnterCallback(GLFWwindow *window, int enter); // GLFW3 Cursor Enter Callback, cursor enters client area
static void WindowSizeCallback(GLFWwindow *window, int width, int height); // GLFW3 WindowSize Callback, runs when window is resized
+#endif
+
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI)
static void TakeScreenshot(void); // Takes a screenshot and saves it in the same folder as executable
#endif
@@ -243,7 +257,7 @@ static void CommandCallback(struct android_app *app, int32_t cmd); //
//----------------------------------------------------------------------------------
// Module Functions Definition - Window and OpenGL Context Functions
//----------------------------------------------------------------------------------
-#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
// Initialize Window and Graphics Context (OpenGL)
void InitWindow(int width, int height, const char *title)
{
@@ -348,7 +362,7 @@ void CloseWindow(void)
rlglClose(); // De-init rlgl
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
glfwDestroyWindow(window);
glfwTerminate();
#elif defined(PLATFORM_ANDROID) || defined(PLATFORM_RPI)
@@ -380,7 +394,7 @@ void CloseWindow(void)
// Detect if KEY_ESCAPE pressed or Close icon pressed
bool WindowShouldClose(void)
{
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
return (glfwWindowShouldClose(window));
#elif defined(PLATFORM_ANDROID) || defined(PLATFORM_RPI)
return windowShouldClose;
@@ -402,7 +416,7 @@ void ToggleFullscreen(void)
#endif
}
-#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
// Set a custom cursor icon/image
void SetCustomCursor(const char *cursorImage)
{
@@ -411,6 +425,7 @@ void SetCustomCursor(const char *cursorImage)
cursor = LoadTexture(cursorImage);
#if defined(PLATFORM_DESKTOP)
+ // NOTE: emscripten not implemented
glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_HIDDEN);
#endif
customCursor = true;
@@ -436,6 +451,12 @@ int GetScreenHeight(void)
return screenHeight;
}
+// Get the last key pressed
+int GetKeyPressed(void)
+{
+ return lastKeyPressed;
+}
+
// Sets Background Color
void ClearBackground(Color color)
{
@@ -612,19 +633,13 @@ void ShowLogo(void)
//----------------------------------------------------------------------------------
// Module Functions Definition - Input (Keyboard, Mouse, Gamepad) Functions
//----------------------------------------------------------------------------------
-#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
// Detect if a key has been pressed once
bool IsKeyPressed(int key)
{
bool pressed = false;
- currentKeyState[key] = IsKeyDown(key);
-
- if (currentKeyState[key] != previousKeyState[key])
- {
- if (currentKeyState[key]) pressed = true;
- previousKeyState[key] = currentKeyState[key];
- }
+ if ((currentKeyState[key] != previousKeyState[key]) && (currentKeyState[key] == 1)) pressed = true;
else pressed = false;
return pressed;
@@ -642,13 +657,7 @@ bool IsKeyReleased(int key)
{
bool released = false;
- currentKeyState[key] = IsKeyUp(key);
-
- if (currentKeyState[key] != previousKeyState[key])
- {
- if (currentKeyState[key]) released = true;
- previousKeyState[key] = currentKeyState[key];
- }
+ if ((currentKeyState[key] != previousKeyState[key]) && (currentKeyState[key] == 0)) released = true;
else released = false;
return released;
@@ -666,13 +675,7 @@ bool IsMouseButtonPressed(int button)
{
bool pressed = false;
- currentMouseState[button] = IsMouseButtonDown(button);
-
- if (currentMouseState[button] != previousMouseState[button])
- {
- if (currentMouseState[button]) pressed = true;
- previousMouseState[button] = currentMouseState[button];
- }
+ if ((currentMouseState[button] != previousMouseState[button]) && (currentMouseState[button] == 1)) pressed = true;
else pressed = false;
return pressed;
@@ -690,13 +693,7 @@ bool IsMouseButtonReleased(int button)
{
bool released = false;
- currentMouseState[button] = IsMouseButtonUp(button);
-
- if (currentMouseState[button] != previousMouseState[button])
- {
- if (currentMouseState[button]) released = true;
- previousMouseState[button] = currentMouseState[button];
- }
+ if ((currentMouseState[button] != previousMouseState[button]) && (currentMouseState[button] == 0)) released = true;
else released = false;
return released;
@@ -727,6 +724,16 @@ Vector2 GetMousePosition(void)
return mousePosition;
}
+// Set mouse position XY
+void SetMousePosition(Vector2 position)
+{
+ mousePosition = position;
+#if defined(PLATFORM_DESKTOP)
+ // NOTE: emscripten not implemented
+ glfwSetCursorPos(window, position.x, position.y);
+#endif
+}
+
// Returns mouse wheel movement Y
int GetMouseWheelMove(void)
{
@@ -738,7 +745,8 @@ int GetMouseWheelMove(void)
}
#endif
-// TODO: Enable gamepad usage on Rapsberr Pi
+// TODO: Enable gamepad usage on Rapsberry Pi
+// NOTE: emscripten not implemented
#if defined(PLATFORM_DESKTOP)
// Detect if a gamepad is available
bool IsGamepadAvailable(int gamepad)
@@ -842,6 +850,18 @@ bool IsScreenTouched(void)
return touchTap;
}
+bool IsDoubleTap(void)
+{
+ if (doubleTap) TraceLog(INFO, "DOUBLE TAP gesture detected");
+
+ return doubleTap;
+}
+
+bool IsDragGesture(void)
+{
+ return drag;
+}
+
// Returns touch position X
int GetTouchX(void)
{
@@ -861,6 +881,27 @@ Vector2 GetTouchPosition(void)
return position;
}
+
+/*bool GetPointer(Vector2 *dragPositions)
+{
+ //static int stdVector[MAX_TOUCH_POINTS];
+ //static int indexPosition = 0;
+ //if (indexPosition == 0) return false;
+ Vector2 vec_pointers_[];
+
+ //eventDrag
+ int32_t iIndex = FindIndex( eventDrag, vec_pointers_[0] );
+
+ if (iIndex == -1) return false;
+
+ float x = AMotionEvent_getX(eventDrag, iIndex);
+ float y = AMotionEvent_getY(eventDrag, iIndex);
+
+ *dragPositions = Vector2( x, y );
+
+
+ return true;
+}*/
#endif
//----------------------------------------------------------------------------------
@@ -881,11 +922,13 @@ static void InitDisplay(int width, int height)
// Downscale matrix is required in case desired screen area is bigger than display area
downscaleView = MatrixIdentity();
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
glfwSetErrorCallback(ErrorCallback);
if (!glfwInit()) TraceLog(ERROR, "Failed to initialize GLFW");
+ // NOTE: Getting video modes is not implemented in emscripten GLFW3 version
+#if defined(PLATFORM_DESKTOP)
// Find monitor resolution
const GLFWvidmode *mode = glfwGetVideoMode(glfwGetPrimaryMonitor());
@@ -895,7 +938,11 @@ static void InitDisplay(int width, int height)
// Screen size security check
if (screenWidth <= 0) screenWidth = displayWidth;
if (screenHeight <= 0) screenHeight = displayHeight;
-
+#elif defined(PLATFORM_WEB)
+ displayWidth = screenWidth;
+ displayHeight = screenHeight;
+#endif
+
glfwDefaultWindowHints(); // Set default windows hints
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE); // Avoid window being resizable
@@ -944,7 +991,9 @@ static void InitDisplay(int width, int height)
else
{
TraceLog(INFO, "Display device initialized successfully");
+#if defined(PLATFORM_DESKTOP)
TraceLog(INFO, "Display size: %i x %i", displayWidth, displayHeight);
+#endif
TraceLog(INFO, "Render size: %i x %i", renderWidth, renderHeight);
TraceLog(INFO, "Screen size: %i x %i", screenWidth, screenHeight);
TraceLog(INFO, "Viewport offsets: %i, %i", renderOffsetX, renderOffsetY);
@@ -953,6 +1002,8 @@ static void InitDisplay(int width, int height)
glfwSetWindowSizeCallback(window, WindowSizeCallback);
glfwSetCursorEnterCallback(window, CursorEnterCallback);
glfwSetKeyCallback(window, KeyCallback);
+ glfwSetMouseButtonCallback(window, MouseButtonCallback);
+ glfwSetCharCallback(window, CharCallback);
glfwSetScrollCallback(window, ScrollCallback);
glfwMakeContextCurrent(window);
@@ -1116,7 +1167,7 @@ void InitGraphics(void)
#endif
}
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
// GLFW3 Error Callback, runs on GLFW3 error
static void ErrorCallback(int error, const char *description)
{
@@ -1138,10 +1189,31 @@ static void KeyCallback(GLFWwindow *window, int key, int scancode, int action, i
// NOTE: Before closing window, while loop must be left!
}
+#if defined(PLATFORM_DESKTOP)
else if (key == GLFW_KEY_F12 && action == GLFW_PRESS)
{
TakeScreenshot();
}
+#endif
+ else currentKeyState[key] = action;
+
+ // HACK for GuiTextBox, to deteck back key
+ // TODO: Review...
+ if ((key == 259) && (action == GLFW_PRESS)) lastKeyPressed = 3;
+}
+
+// GLFW3 Mouse Button Callback, runs on mouse button pressed
+static void MouseButtonCallback(GLFWwindow *window, int button, int action, int mods)
+{
+ currentMouseState[button] = action;
+}
+
+// GLFW3 Char Key Callback, runs on key pressed (get char value)
+static void CharCallback(GLFWwindow *window, unsigned int key)
+{
+ lastKeyPressed = key;
+
+ //TraceLog(INFO, "Char Callback Key pressed: %i\n", key);
}
// GLFW3 CursorEnter Callback, when cursor enters the window
@@ -1177,6 +1249,7 @@ static int32_t InputCallback(struct android_app *app, AInputEvent *event)
if (type == AINPUT_EVENT_TYPE_MOTION)
{
+ // Detect TOUCH position
if ((screenWidth > displayWidth) || (screenHeight > displayHeight))
{
// TODO: Seems to work ok but... review!
@@ -1240,7 +1313,124 @@ static int32_t InputCallback(struct android_app *app, AInputEvent *event)
//size_t pointerCount = AMotionEvent_getPointerCount(event);
//float AMotionEvent_getPressure(const AInputEvent *motion_event, size_t pointer_index); // 0 to 1
//float AMotionEvent_getSize(const AInputEvent *motion_event, size_t pointer_index); // Pressed area
+
+ // Detect DOUBLE TAP event
+ bool tapDetected = touchTap;
+
+ switch (flags)
+ {
+ case AMOTION_EVENT_ACTION_DOWN:
+ {
+ int64_t eventTime = AMotionEvent_getEventTime(event);
+
+ if (eventTime - lastTapTime <= DOUBLE_TAP_TIMEOUT)
+ {
+ float x = AMotionEvent_getX(event, 0) - lastTapX;
+ float y = AMotionEvent_getY(event, 0) - lastTapY;
+
+ float densityFactor = 1.0f;
+
+ if ((x*x + y*y) < (DOUBLE_TAP_SLOP*DOUBLE_TAP_SLOP*densityFactor))
+ {
+ // Doubletap detected
+ doubleTap = true;
+
+ }
+ }
+ } break;
+ case AMOTION_EVENT_ACTION_UP:
+ {
+ if (tapDetected)
+ {
+ lastTapTime = AMotionEvent_getEventTime(event);
+ lastTapX = AMotionEvent_getX(event, 0);
+ lastTapY = AMotionEvent_getY(event, 0);
+
+ }
+ } break;
+ }
+
+
+ // Detect DRAG event
+ //int32_t action = AMotionEvent_getAction(event);
+
+ int32_t index = (action & AMOTION_EVENT_ACTION_POINTER_INDEX_MASK) >> AMOTION_EVENT_ACTION_POINTER_INDEX_SHIFT;
+ //uint32_t flags = action & AMOTION_EVENT_ACTION_MASK;
+ //event_ = event;
+ int32_t count = AMotionEvent_getPointerCount(event);
+
+ switch (flags)
+ {
+ case AMOTION_EVENT_ACTION_DOWN:
+ {
+ stdVector[indexPosition] = AMotionEvent_getPointerId(event, 0);
+ indexPosition++;
+ TraceLog(INFO, "ACTION_DOWN");
+
+ //ret = GESTURE_STATE_START;
+ } break;
+ case AMOTION_EVENT_ACTION_POINTER_DOWN:
+ {
+ stdVector[indexPosition] = AMotionEvent_getPointerId(event, index);
+ indexPosition++;
+ TraceLog(INFO, "ACTION_POINTER_DOWN");
+
+ } break;
+ case AMOTION_EVENT_ACTION_UP:
+ {
+ //int value = stdVector[indexPosition];
+ indexPosition--;
+ //ret = GESTURE_STATE_END;
+ TraceLog(INFO, "ACTION_UP");
+
+ } break;
+ case AMOTION_EVENT_ACTION_POINTER_UP:
+ {
+ int32_t releasedPointerId = AMotionEvent_getPointerId(event, index);
+
+ int i = 0;
+ for (i = 0; i < MAX_TOUCH_POINTS; i++)
+ {
+ if (stdVector[i] == releasedPointerId)
+ {
+ for (int k = i; k < indexPosition - 1; k++)
+ {
+ stdVector[k] = stdVector[k + 1];
+ }
+
+ //indexPosition--;
+ indexPosition = 0;
+ break;
+ }
+ }
+
+ if (i <= 1)
+ {
+ // Reset pinch or drag
+ //if (count == 2) //ret = GESTURE_STATE_START;
+ }
+ TraceLog(INFO, "ACTION_POINTER_UP");
+
+ } break;
+ case AMOTION_EVENT_ACTION_MOVE:
+ {
+ if (count == 1)
+ {
+ //TraceLog(INFO, "DRAG gesture detected");
+
+ drag = true; //ret = GESTURE_STATE_MOVE;
+ }
+ else break;
+ TraceLog(INFO, "ACTION_MOVE");
+
+ } break;
+ case AMOTION_EVENT_ACTION_CANCEL: break;
+ default: break;
+ }
+
+ //--------------------------------------------------------------------
+
return 1;
}
else if (type == AINPUT_EVENT_TYPE_KEY)
@@ -1386,7 +1576,7 @@ static void InitTimer(void)
// Get current time measure since InitTimer()
static double GetTime(void)
{
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
return glfwGetTime();
#elif defined(PLATFORM_ANDROID) || defined(PLATFORM_RPI)
struct timespec ts;
@@ -1400,7 +1590,7 @@ static double GetTime(void)
// Get one key state
static bool GetKeyStatus(int key)
{
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
return glfwGetKey(window, key);
#elif defined(PLATFORM_ANDROID)
// TODO: Check virtual keyboard (?)
@@ -1415,7 +1605,7 @@ static bool GetKeyStatus(int key)
// Get one mouse button state
static bool GetMouseButtonStatus(int button)
{
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
return glfwGetMouseButton(window, button);
#elif defined(PLATFORM_ANDROID)
// TODO: Check virtual keyboard (?)
@@ -1429,7 +1619,7 @@ static bool GetMouseButtonStatus(int button)
// Poll (store) all input events
static void PollInputEvents(void)
{
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
// Mouse input polling
double mouseX;
double mouseY;
@@ -1441,14 +1631,23 @@ static void PollInputEvents(void)
// Keyboard polling
// Automatically managed by GLFW3 through callback
-
+ lastKeyPressed = -1;
+
+ // Register previous keys states
+ for (int i = 0; i < 512; i++) previousKeyState[i] = currentKeyState[i];
+
+ // Register previous mouse states
+ for (int i = 0; i < 3; i++) previousMouseState[i] = currentMouseState[i];
+
glfwPollEvents(); // Register keyboard/mouse events
#elif defined(PLATFORM_ANDROID)
// TODO: Check virtual keyboard (?)
- // Reset touchTap event
+ // Reset touch events
touchTap = false;
+ doubleTap = false;
+ drag = false;
// Poll Events (registered events)
while ((ident = ALooper_pollAll(0, NULL, &events,(void**)&source)) >= 0)
@@ -1597,14 +1796,17 @@ static void PollInputEvents(void)
static void InitMouse(void)
{
// NOTE: We can use /dev/input/mice to read from all available mice
- if ((mouseStream = open(DEFAULT_MOUSE_DEV, O_RDONLY|O_NONBLOCK)) < 0) TraceLog(WARNING, "Could not open mouse device, no mouse available");
+ if ((mouseStream = open(DEFAULT_MOUSE_DEV, O_RDONLY|O_NONBLOCK)) < 0)
+ {
+ TraceLog(WARNING, "Mouse device could not be opened, no mouse available");
+ }
else
{
mouseReady = true;
- int err = pthread_create(&mouseThreadId, NULL, &MouseThread, NULL);
+ int error = pthread_create(&mouseThreadId, NULL, &MouseThread, NULL);
- if (err != 0) TraceLog(WARNING, "Error creating mouse input event thread");
+ if (error != 0) TraceLog(WARNING, "Error creating mouse input event thread");
else TraceLog(INFO, "Mouse device initialized successfully");
}
}
@@ -1715,7 +1917,7 @@ static void RestoreKeyboard(void)
static void InitGamepad(void)
{
// TODO: Gamepad support
- if ((gamepadStream = open(DEFAULT_GAMEPAD_DEV, O_RDONLY|O_NONBLOCK)) < 0) TraceLog(WARNING, "Could not open gamepad device, no gamepad available");
+ if ((gamepadStream = open(DEFAULT_GAMEPAD_DEV, O_RDONLY|O_NONBLOCK)) < 0) TraceLog(WARNING, "Gamepad device could not be opened, no gamepad available");
else TraceLog(INFO, "Gamepad device initialized successfully");
}
#endif
@@ -1723,7 +1925,7 @@ static void InitGamepad(void)
// Copy back buffer to front buffers
static void SwapBuffers(void)
{
-#if defined(PLATFORM_DESKTOP)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_WEB)
glfwSwapBuffers(window);
#elif defined(PLATFORM_ANDROID) || defined(PLATFORM_RPI)
eglSwapBuffers(display, surface);
@@ -1737,7 +1939,7 @@ static void SetupFramebufferSize(int displayWidth, int displayHeight)
// Calculate renderWidth and renderHeight, we have the display size (input params) and the desired screen size (global var)
if ((screenWidth > displayWidth) || (screenHeight > displayHeight))
{
- TraceLog(WARNING, "DOWNSCALING: Required screen size (%i x %i) is bigger than display size (%i x %i)", screenWidth, screenHeight, displayWidth, displayHeight);
+ TraceLog(WARNING, "DOWNSCALING: Required screen size (%ix%i) is bigger than display size (%ix%i)", screenWidth, screenHeight, displayWidth, displayHeight);
// Downscaling to fit display with border-bars
float widthRatio = (float)displayWidth/(float)screenWidth;
@@ -1806,6 +2008,7 @@ static void SetupFramebufferSize(int displayWidth, int displayHeight)
// Plays raylib logo appearing animation
static void LogoAnimation(void)
{
+#ifndef PLATFORM_WEB
int logoPositionX = screenWidth/2 - 128;
int logoPositionY = screenHeight/2 - 128;
@@ -1923,6 +2126,8 @@ static void LogoAnimation(void)
EndDrawing();
//----------------------------------------------------------------------------------
}
+#endif
showLogo = false; // Prevent for repeating when reloading window (Android)
}
+
diff --git a/src/makefile b/src/makefile
index bb37748a..501bd0c9 100644
--- a/src/makefile
+++ b/src/makefile
@@ -1,8 +1,6 @@
#**************************************************************************************************
#
-# raylib for Raspberry Pi and Windows desktop
-#
-# makefile for library compilation (raylib.a)
+# raylib makefile for desktop platforms, Raspberry Pi and HTML5 (emscripten)
#
# Copyright (c) 2014 Ramon Santamaria (Ray San - raysan@raysanweb.com)
#
@@ -23,9 +21,9 @@
#
#**************************************************************************************************
-# define raylib platform (by default, compile for RPI)
-# Other possible platforms: PLATFORM_DESKTOP PLATFORM_DESKTOP_LINUX
-PLATFORM ?= PLATFORM_RPI
+# define raylib platform to compile for
+# possible platforms: PLATFORM_DESKTOP PLATFORM_RPI PLATFORM_WEB
+PLATFORM ?= PLATFORM_DESKTOP
# define raylib graphics api depending on selected platform
ifeq ($(PLATFORM),PLATFORM_RPI)
@@ -37,20 +35,31 @@ else
#GRAPHICS = GRAPHICS_API_OPENGL_33 # Uncomment to use OpenGL 3.3
endif
+ifeq ($(PLATFORM),PLATFORM_WEB)
+ GRAPHICS = GRAPHICS_API_OPENGL_ES2
+endif
+
# NOTE: makefiles targets require tab indentation
# define compiler: gcc for C program, define as g++ for C++
-CC = gcc
+ifeq ($(PLATFORM),PLATFORM_WEB)
+ # define emscripten compiler
+ CC = emcc
+else
+ # define default gcc compiler
+ CC = gcc
+endif
# define compiler flags:
# -O2 defines optimization level
# -Wall turns on most, but not all, compiler warnings
# -std=c99 use standard C from 1999 revision
ifeq ($(PLATFORM),PLATFORM_RPI)
- CFLAGS = -O2 -Wall -std=gnu99 -fgnu89-inline
+ CFLAGS = -O1 -Wall -std=gnu99 -fgnu89-inline
else
- CFLAGS = -O2 -Wall -std=c99 -fgnu89-inline
+ CFLAGS = -O1 -Wall -std=c99
endif
+
#CFLAGSEXTRA = -Wextra -Wmissing-prototypes -Wstrict-prototypes
# define any directories containing required header files
@@ -69,15 +78,20 @@ default: raylib
# compile raylib library
raylib: $(OBJS)
+ifeq ($(PLATFORM),PLATFORM_WEB)
+ emcc -O1 $(OBJS) -o libraylib.bc
+else
ar rcs libraylib.a $(OBJS)
+endif
# compile core module
+# emcc core.c -o core.bc -DPLATFORM_DESKTOP
core.o: core.c
- $(CC) -c core.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM)
+ $(CC) -c core.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM) -D$(GRAPHICS)
# compile rlgl module
rlgl.o: rlgl.c
- $(CC) -c rlgl.c $(CFLAGS) $(INCLUDES) -D$(GRAPHICS)
+ $(CC) -c rlgl.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM) -D$(GRAPHICS)
# compile raymath module
raymath.o: raymath.c
@@ -85,19 +99,19 @@ raymath.o: raymath.c
# compile shapes module
shapes.o: shapes.c
- $(CC) -c shapes.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM)
+ $(CC) -c shapes.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM) -D$(GRAPHICS)
# compile textures module
textures.o: textures.c
- $(CC) -c textures.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM)
+ $(CC) -c textures.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM) -D$(GRAPHICS)
# compile text module
text.o: text.c
- $(CC) -c text.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM)
+ $(CC) -c text.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM) -D$(GRAPHICS)
# compile models module
models.o: models.c
- $(CC) -c models.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM)
+ $(CC) -c models.c $(CFLAGS) $(INCLUDES) -D$(PLATFORM) -D$(GRAPHICS)
# compile audio module
audio.o: audio.c
@@ -120,9 +134,13 @@ ifeq ($(PLATFORM),PLATFORM_DESKTOP_LINUX)
find . -type f -executable -delete
rm -f *.o libraylib.a
else
+ifeq ($(PLATFORM),PLATFORM_WEB)
+ del *.o libraylib.bc
+else
del *.o libraylib.a
endif
endif
+endif
@echo Cleaning done
# instead of defining every module one by one, we can define a pattern
diff --git a/src/models.c b/src/models.c
index 2d2af038..e8e4f635 100644
--- a/src/models.c
+++ b/src/models.c
@@ -50,7 +50,7 @@
//----------------------------------------------------------------------------------
// Global Variables Definition
//----------------------------------------------------------------------------------
-// It's lonely here...
+extern unsigned int whiteTexture;
//----------------------------------------------------------------------------------
// Module specific Functions Declaration
@@ -66,14 +66,14 @@ static VertexData LoadOBJ(const char *fileName);
// NOTE: Cube position is the center position
void DrawCube(Vector3 position, float width, float height, float lenght, Color color)
{
- float x = position.x;
- float y = position.y;
- float z = position.z;
+ float x = 0.0f;
+ float y = 0.0f;
+ float z = 0.0f;
rlPushMatrix();
// NOTE: Be careful! Function order matters (rotate -> scale -> translate)
- //rlTranslatef(0.0f, 0.0f, 0.0f);
+ rlTranslatef(position.x, position.y, position.z);
//rlScalef(2.0f, 2.0f, 2.0f);
//rlRotatef(45, 0, 1, 0);
@@ -146,12 +146,13 @@ void DrawCubeV(Vector3 position, Vector3 size, Color color)
// Draw cube wires
void DrawCubeWires(Vector3 position, float width, float height, float lenght, Color color)
{
- float x = position.x;
- float y = position.y;
- float z = position.z;
+ float x = 0.0f;
+ float y = 0.0f;
+ float z = 0.0f;
rlPushMatrix();
+ rlTranslatef(position.x, position.y, position.z);
//rlRotatef(45, 0, 1, 0);
rlBegin(RL_LINES);
@@ -445,11 +446,37 @@ void DrawCylinderWires(Vector3 position, float radiusTop, float radiusBottom, fl
rlPopMatrix();
}
+// Draw a quad
+void DrawQuad(Vector3 vertices[4], Vector2 textcoords[4], Vector3 normals[4], Color colors[4])
+{
+ rlBegin(RL_QUADS);
+ rlColor4ub(colors[0].r, colors[0].g, colors[0].b, colors[0].a);
+ rlNormal3f(normals[0].x, normals[0].y, normals[0].z);
+ rlTexCoord2f(textcoords[0].x, textcoords[0].y);
+ rlVertex3f(vertices[0].x, vertices[0].y, vertices[0].z);
+
+ rlColor4ub(colors[1].r, colors[1].g, colors[1].b, colors[1].a);
+ rlNormal3f(normals[1].x, normals[1].y, normals[1].z);
+ rlTexCoord2f(textcoords[1].x, textcoords[1].y);
+ rlVertex3f(vertices[1].x, vertices[1].y, vertices[1].z);
+
+ rlColor4ub(colors[2].r, colors[2].g, colors[2].b, colors[2].a);
+ rlNormal3f(normals[2].x, normals[2].y, normals[2].z);
+ rlTexCoord2f(textcoords[2].x, textcoords[2].y);
+ rlVertex3f(vertices[2].x, vertices[2].y, vertices[2].z);
+
+ rlColor4ub(colors[3].r, colors[3].g, colors[3].b, colors[3].a);
+ rlNormal3f(normals[3].x, normals[3].y, normals[3].z);
+ rlTexCoord2f(textcoords[3].x, textcoords[3].y);
+ rlVertex3f(vertices[3].x, vertices[3].y, vertices[3].z);
+ rlEnd();
+}
+
// Draw a plane
void DrawPlane(Vector3 centerPos, Vector2 size, Vector3 rotation, Color color)
{
// NOTE: QUADS usage require defining a texture on OpenGL 3.3+
- rlEnableTexture(1); // Default white texture
+ if (rlGetVersion() != OPENGL_11) rlEnableTexture(whiteTexture); // Default white texture
// NOTE: Plane is always created on XZ ground and then rotated
rlPushMatrix();
@@ -471,7 +498,7 @@ void DrawPlane(Vector3 centerPos, Vector2 size, Vector3 rotation, Color color)
rlEnd();
rlPopMatrix();
- rlDisableTexture();
+ if (rlGetVersion() != OPENGL_11) rlDisableTexture();
}
// Draw a plane with divisions
@@ -743,7 +770,7 @@ Model LoadHeightmap(Image heightmap, float maxHeight)
vData.texcoords[tcCounter + 7] = vData.texcoords[tcCounter + 5];
vData.texcoords[tcCounter + 8] = vData.texcoords[tcCounter + 2];
- vData.texcoords[tcCounter + 9] = vData.texcoords[tcCounter + 1];
+ vData.texcoords[tcCounter + 9] = vData.texcoords[tcCounter + 3];
vData.texcoords[tcCounter + 10] = (float)(x+1) / (mapX-1);
vData.texcoords[tcCounter + 11] = (float)(z+1) / (mapZ-1);
@@ -785,7 +812,7 @@ Model LoadHeightmap(Image heightmap, float maxHeight)
}
// Load a map image as a 3d model (cubes based)
-Model LoadCubesmap(Image cubesmap)
+Model LoadCubicmap(Image cubesmap)
{
VertexData vData;
@@ -1041,8 +1068,6 @@ Model LoadCubesmap(Image cubesmap)
// Move data from mapVertices temp arays to vertices float array
vData.vertexCount = vCounter;
- printf("Vertex count: %i\n", vCounter);
-
vData.vertices = (float *)malloc(vData.vertexCount * 3 * sizeof(float));
vData.normals = (float *)malloc(vData.vertexCount * 3 * sizeof(float));
vData.texcoords = (float *)malloc(vData.vertexCount * 2 * sizeof(float));
@@ -1263,6 +1288,12 @@ static VertexData LoadOBJ(const char *fileName)
FILE *objFile;
objFile = fopen(fileName, "rt");
+
+ if (objFile == NULL)
+ {
+ TraceLog(WARNING, "[%s] OBJ file could not be opened", fileName);
+ return vData;
+ }
// First reading pass: Get numVertex, numNormals, numTexCoords, numTriangles
// NOTE: vertex, texcoords and normals could be optimized (to be used indexed on faces definition)
@@ -1490,4 +1521,36 @@ static VertexData LoadOBJ(const char *fileName)
TraceLog(INFO, "[%s] Model loaded successfully in RAM (CPU)", fileName);
return vData;
-} \ No newline at end of file
+}
+
+bool CheckCollisionSpheres(Vector3 centerA, float radiusA, Vector3 centerB, float radiusB)
+{
+
+ return false;
+}
+
+bool CheckCollisionBoxes(Vector3 minBBox1, Vector3 maxBBox1, Vector3 minBBox2, Vector3 maxBBox2)
+{
+ /*
+ // Get min and max vertex to construct bounds (AABB)
+ Vector3 minVertex = tempVertices[0];
+ Vector3 maxVertex = tempVertices[0];
+
+ for (int i = 1; i < tempVertices.Count; i++)
+ {
+ minVertex = Vector3.Min(minVertex, tempVertices[i]);
+ maxVertex = Vector3.Max(maxVertex, tempVertices[i]);
+ }
+
+ bounds = new BoundingBox(minVertex, maxVertex);
+ */
+ return false;
+}
+
+bool CheckCollisionBoxSphere(Vector3 minBBox, Vector3 maxBBox, Vector3 centerSphere, Vector3 radiusSphere)
+{
+
+ return false;
+}
+
+//BoundingBox GetCollisionArea(BoundingBox box1, BoundingBox box2) \ No newline at end of file
diff --git a/src/raylib.h b/src/raylib.h
index 9809d823..69966069 100644
--- a/src/raylib.h
+++ b/src/raylib.h
@@ -14,7 +14,7 @@
* Basic 3d support for Shapes, Models, Heightmaps and Billboards
* Powerful math module for Vector and Matrix operations [raymath]
* Audio loading and playing with streaming support (WAV and OGG)
-* Multiplatform support, including Android devices and Raspberry Pi
+* Multiplatform support, including Android devices, Raspberry Pi and HTML5
*
* Used external libs:
* GLFW3 (www.glfw.org) for window/context management and input
@@ -63,9 +63,10 @@
//#define PLATFORM_DESKTOP // Windows, Linux or OSX
//#define PLATFORM_ANDROID // Android device
//#define PLATFORM_RPI // Raspberry Pi
+//#define PLATFORM_WEB // HTML5 (emscripten, asm.js)
// Security check in case no PLATFORM_* defined
-#if !defined(PLATFORM_DESKTOP) && !defined(PLATFORM_ANDROID) && !defined(PLATFORM_RPI)
+#if !defined(PLATFORM_DESKTOP) && !defined(PLATFORM_ANDROID) && !defined(PLATFORM_RPI) && !defined(PLATFORM_WEB)
#define PLATFORM_DESKTOP
#endif
@@ -275,6 +276,15 @@ typedef struct Sound {
unsigned int buffer;
} Sound;
+// Wave type, defines audio wave data
+typedef struct Wave {
+ void *data; // Buffer data pointer
+ unsigned int dataSize; // Data size in bytes
+ unsigned int sampleRate;
+ short bitsPerSample;
+ short channels;
+} Wave;
+
#ifdef __cplusplus
extern "C" { // Prevents name mangling of functions
#endif
@@ -289,7 +299,7 @@ extern "C" { // Prevents name mangling of functions
//------------------------------------------------------------------------------------
#if defined(PLATFORM_ANDROID)
void InitWindow(int width, int height, struct android_app *state); // Init Android activity
-#elif defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI)
+#elif defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
void InitWindow(int width, int height, const char *title); // Initialize Window and OpenGL Graphics
#endif
@@ -302,6 +312,7 @@ void SetExitKey(int key); // Set a custom key
#endif
int GetScreenWidth(void); // Get current screen width
int GetScreenHeight(void); // Get current screen height
+int GetKeyPressed(void); // Get latest key pressed
void ClearBackground(Color color); // Sets Background Color
void BeginDrawing(void); // Setup drawing canvas to start drawing
@@ -321,13 +332,12 @@ int GetRandomValue(int min, int max); // Returns a random
Color Fade(Color color, float alpha); // Color fade-in or fade-out, alpha goes from 0.0f to 1.0f
void SetupFlags(char flags); // Enable some window configurations
-
-void ShowLogo(void); // Activates raylib logo at startup
+void ShowLogo(void); // Activates raylib logo at startup (can be done with flags)
//------------------------------------------------------------------------------------
// Input Handling Functions (Module: core)
//------------------------------------------------------------------------------------
-#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI)
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
bool IsKeyPressed(int key); // Detect if a key has been pressed once
bool IsKeyDown(int key); // Detect if a key is being pressed
bool IsKeyReleased(int key); // Detect if a key has been released once
@@ -340,8 +350,11 @@ bool IsMouseButtonUp(int button); // Detect if a mouse but
int GetMouseX(void); // Returns mouse position X
int GetMouseY(void); // Returns mouse position Y
Vector2 GetMousePosition(void); // Returns mouse position XY
+void SetMousePosition(Vector2 position); // Set mouse position XY
int GetMouseWheelMove(void); // Returns mouse wheel movement Y
+#endif
+#if defined(PLATFORM_DESKTOP)
bool IsGamepadAvailable(int gamepad); // Detect if a gamepad is available
Vector2 GetGamepadMovement(int gamepad); // Return axis movement vector for a gamepad
bool IsGamepadButtonPressed(int gamepad, int button); // Detect if a gamepad button has been pressed once
@@ -394,9 +407,11 @@ Image LoadImage(const char *fileName);
Image LoadImageFromRES(const char *rresName, int resId); // Load an image from rRES file (raylib Resource)
Texture2D LoadTexture(const char *fileName); // Load an image as texture into GPU memory
Texture2D LoadTextureFromRES(const char *rresName, int resId); // Load an image as texture from rRES file (raylib Resource)
-Texture2D CreateTexture(Image image, bool genMipmaps); // Create a Texture2D from Image data (and generate mipmaps)
+Texture2D LoadTextureFromImage(Image image, bool genMipmaps); // Load a texture from image data (and generate mipmaps)
+Texture2D CreateTexture(Image image, bool genMipmaps); // [DEPRECATED] Same as LoadTextureFromImage()
void UnloadImage(Image image); // Unload image from CPU memory (RAM)
void UnloadTexture(Texture2D texture); // Unload texture from GPU memory
+void ConvertToPOT(Image *image, Color fillColor); // Convert image to POT (power-of-two)
void DrawTexture(Texture2D texture, int posX, int posY, Color tint); // Draw a Texture2D
void DrawTextureV(Texture2D texture, Vector2 position, Color tint); // Draw a Texture2D with position defined as Vector2
@@ -433,6 +448,7 @@ void DrawSphereEx(Vector3 centerPos, float radius, int rings, int slices, Color
void DrawSphereWires(Vector3 centerPos, float radius, int rings, int slices, Color color); // Draw sphere wires
void DrawCylinder(Vector3 position, float radiusTop, float radiusBottom, float height, int slices, Color color); // Draw a cylinder/cone
void DrawCylinderWires(Vector3 position, float radiusTop, float radiusBottom, float height, int slices, Color color); // Draw a cylinder/cone wires
+void DrawQuad(Vector3 vertices[4], Vector2 textcoords[4], Vector3 normals[4], Color colors[4]); // Draw a quad
void DrawPlane(Vector3 centerPos, Vector2 size, Vector3 rotation, Color color); // Draw a plane
void DrawPlaneEx(Vector3 centerPos, Vector2 size, Vector3 rotation, int slicesX, int slicesZ, Color color); // Draw a plane with divisions
void DrawGrid(int slices, float spacing); // Draw a grid (centered at (0, 0, 0))
@@ -446,7 +462,7 @@ void DrawGizmoEx(Vector3 position, Vector3 rotation, float scale);
Model LoadModel(const char *fileName); // Load a 3d model (.OBJ)
//Model LoadModelFromRES(const char *rresName, int resId); // TODO: Load a 3d model from rRES file (raylib Resource)
Model LoadHeightmap(Image heightmap, float maxHeight); // Load a heightmap image as a 3d model
-Model LoadCubesmap(Image cubesmap); // Load a map image as a 3d model (cubes based)
+Model LoadCubicmap(Image cubicmap); // Load a map image as a 3d model (cubes based)
void UnloadModel(Model model); // Unload 3d model from memory
void SetModelTexture(Model *model, Texture2D texture); // Link a texture to a model
@@ -464,6 +480,7 @@ void InitAudioDevice(void); // Initialize au
void CloseAudioDevice(void); // Close the audio device and context (and music stream)
Sound LoadSound(char *fileName); // Load sound to memory
+Sound LoadSoundFromWave(Wave wave); // Load sound to memory from wave data
Sound LoadSoundFromRES(const char *rresName, int resId); // Load sound to memory from rRES file (raylib Resource)
void UnloadSound(Sound sound); // Unload sound
void PlaySound(Sound sound); // Play a sound
diff --git a/src/raymath.c b/src/raymath.c
index e598b381..ed45ee92 100644
--- a/src/raymath.c
+++ b/src/raymath.c
@@ -329,8 +329,6 @@ void MatrixInvert(Matrix *mat)
// Calculate the invert determinant (inlined to avoid double-caching)
float invDet = 1/(b00*b11 - b01*b10 + b02*b09 + b03*b08 - b04*b07 + b05*b06);
- printf("%f\n", invDet);
-
temp.m0 = (a11*b11 - a12*b10 + a13*b09)*invDet;
temp.m1 = (-a01*b11 + a02*b10 - a03*b09)*invDet;
temp.m2 = (a31*b05 - a32*b04 + a33*b03)*invDet;
@@ -492,6 +490,48 @@ Matrix MatrixRotate(float angleX, float angleY, float angleZ)
return result;
}
+/*
+Matrix MatrixRotate(float angle, float x, float y, float z)
+{
+ Matrix result = MatrixIdentity();
+
+ float c = cosf(angle*DEG2RAD); // cosine
+ float s = sinf(angle*DEG2RAD); // sine
+ float c1 = 1.0f - c; // 1 - c
+
+ float m0 = result.m0, m4 = result.m4, m8 = result.m8, m12 = result.m12,
+ m1 = result.m1, m5 = result.m5, m9 = result.m9, m13 = result.m13,
+ m2 = result.m2, m6 = result.m6, m10 = result.m10, m14 = result.m14;
+
+ // build rotation matrix
+ float r0 = x * x * c1 + c;
+ float r1 = x * y * c1 + z * s;
+ float r2 = x * z * c1 - y * s;
+ float r4 = x * y * c1 - z * s;
+ float r5 = y * y * c1 + c;
+ float r6 = y * z * c1 + x * s;
+ float r8 = x * z * c1 + y * s;
+ float r9 = y * z * c1 - x * s;
+ float r10= z * z * c1 + c;
+
+ // multiply rotation matrix
+ result.m0 = r0*m0 + r4*m1 + r8*m2;
+ result.m1 = r1*m0 + r5*m1 + r9*m2;
+ result.m2 = r2*m0 + r6*m1 + r10*m2;
+ result.m4 = r0*m4 + r4*m5 + r8*m6;
+ result.m5 = r1*m4 + r5*m5 + r9*m6;
+ result.m6 = r2*m4 + r6*m5 + r10*m6;
+ result.m8 = r0*m8 + r4*m9 + r8*m10;
+ result.m9 = r1*m8 + r5*m9 + r9*m10;
+ result.m10 = r2*m8 + r6*m9 + r10*m10;
+ result.m12 = r0*m12+ r4*m13 + r8*m14;
+ result.m13 = r1*m12+ r5*m13 + r9*m14;
+ result.m14 = r2*m12+ r6*m13 + r10*m14;
+
+ return result;
+}
+*/
+
// Create rotation matrix from axis and angle
// TODO: Test this function
// NOTE: NO prototype defined!
@@ -559,8 +599,8 @@ Matrix MatrixFromAxisAngle2(Vector3 axis, float angle)
float axisX = axis.x, axisY = axis.y, axisZ = axis.y;
// Calculate angles
- float cosres = (float)cos(-angle);
- float sinres = (float)sin(-angle);
+ float cosres = (float)cos(angle);
+ float sinres = (float)sin(angle);
float t = 1.0f - cosres;
// Do the conversion math once
@@ -668,6 +708,7 @@ Matrix MatrixScale(float x, float y, float z)
// Returns transformation matrix for a given translation, rotation and scale
// NOTE: Transformation order is rotation -> scale -> translation
+// NOTE: Rotation angles should come in radians
Matrix MatrixTransform(Vector3 translation, Vector3 rotation, Vector3 scale)
{
Matrix result = MatrixIdentity();
diff --git a/src/raymath.h b/src/raymath.h
index c396a347..c8c1a26c 100644
--- a/src/raymath.h
+++ b/src/raymath.h
@@ -55,7 +55,7 @@
} Vector3;
#endif
-// Matrix type (OpenGL style 4x4 - right handed)
+// Matrix type (OpenGL style 4x4 - right handed, column major)
typedef struct Matrix {
float m0, m4, m8, m12;
float m1, m5, m9, m13;
@@ -107,9 +107,9 @@ Matrix MatrixIdentity(void); // Returns identity matr
Matrix MatrixAdd(Matrix left, Matrix right); // Add two matrices
Matrix MatrixSubstract(Matrix left, Matrix right); // Substract two matrices (left - right)
Matrix MatrixTranslate(float x, float y, float z); // Returns translation matrix
-Matrix MatrixRotate(float angleX, float angleY, float angleZ); // Returns rotation matrix
-Matrix MatrixRotateAroundAxis(Vector3 axis, float angle); // Returns rotation matrix for an angle around an specified axis
-Matrix MatrixRotateAroundAxis2(Vector3 axis, float angle); // Returns rotation matrix for an angle around an specified axis (test another implemntation)
+Matrix MatrixRotate(float axisX, float axisY, float axisZ); // Returns rotation matrix
+Matrix MatrixFromAxisAngle(Vector3 axis, float angle); // Returns rotation matrix for an angle around an specified axis
+Matrix MatrixFromAxisAngle2(Vector3 axis, float angle); // Returns rotation matrix for an angle around an specified axis (test another implemntation)
Matrix MatrixFromQuaternion(Quaternion q); // Returns rotation matrix for a given quaternion
Matrix MatrixRotateX(float angle); // Returns x-rotation matrix (angle in radians)
Matrix MatrixRotateY(float angle); // Returns y-rotation matrix (angle in radians)
diff --git a/src/rlgl.c b/src/rlgl.c
index 5e51b2e2..6f2027c7 100644
--- a/src/rlgl.c
+++ b/src/rlgl.c
@@ -168,21 +168,23 @@ static Vector3 *tempBuffer;
static int tempBufferCount = 0;
static bool useTempBuffer = false;
-// White texture useful for plain color polys (required by shader)
-static GLuint whiteTexture;
-
// Support for VAOs (OpenGL ES2 could not support VAO extensions)
static bool vaoSupported = false;
#endif
#if defined(GRAPHICS_API_OPENGL_ES2)
// NOTE: VAO functionality is exposed through extensions (OES)
+// emscripten does not support VAOs
static PFNGLGENVERTEXARRAYSOESPROC glGenVertexArrays;
static PFNGLBINDVERTEXARRAYOESPROC glBindVertexArray;
static PFNGLDELETEVERTEXARRAYSOESPROC glDeleteVertexArrays;
static PFNGLISVERTEXARRAYOESPROC glIsVertexArray;
#endif
+// White texture useful for plain color polys (required by shader)
+// NOTE: It's required in shapes and models modules!
+unsigned int whiteTexture;
+
//----------------------------------------------------------------------------------
// Module specific Functions Declaration
//----------------------------------------------------------------------------------
@@ -298,10 +300,21 @@ void rlRotatef(float angleDeg, float x, float y, float z)
// TODO: Support rotation in multiple axes
Matrix rot = MatrixIdentity();
+ // OPTION 1: It works...
if (x == 1) rot = MatrixRotateX(angleDeg*DEG2RAD);
else if (y == 1) rot = MatrixRotateY(angleDeg*DEG2RAD);
else if (z == 1) rot = MatrixRotateZ(angleDeg*DEG2RAD);
-
+
+ // OPTION 2: Requires review...
+ //Vector3 vec = (Vector3){ 0, 1, 0 };
+ //VectorNormalize(&vec);
+ //rot = MatrixFromAxisAngle(vec, angleDeg*DEG2RAD); // Working?
+
+ // OPTION 3: TODO: Review, it doesn't work!
+ //Vector3 vec = (Vector3){ x, y, z };
+ //VectorNormalize(&vec);
+ //rot = MatrixRotate(angleDeg*vec.x, angleDeg*vec.x, angleDeg*vec.x);
+
MatrixTranspose(&rot);
*currentMatrix = MatrixMultiply(*currentMatrix, rot);
@@ -760,10 +773,13 @@ void rlglInit(void)
#endif
#if defined(GRAPHICS_API_OPENGL_ES2)
+ // NOTE: emscripten does not support VAOs natively, it uses emulation and it reduces overall performance...
+#if !defined(PLATFORM_WEB)
glGenVertexArrays = (PFNGLGENVERTEXARRAYSOESPROC)eglGetProcAddress("glGenVertexArraysOES");
glBindVertexArray = (PFNGLBINDVERTEXARRAYOESPROC)eglGetProcAddress("glBindVertexArrayOES");
glDeleteVertexArrays = (PFNGLDELETEVERTEXARRAYSOESPROC)eglGetProcAddress("glDeleteVertexArraysOES");
glIsVertexArray = (PFNGLISVERTEXARRAYOESPROC)eglGetProcAddress("glIsVertexArrayOES");
+#endif
if (glGenVertexArrays == NULL) TraceLog(WARNING, "Could not initialize VAO extensions, VAOs not supported");
else
@@ -848,8 +864,8 @@ void rlglInit(void)
whiteTexture = rlglLoadTexture(pixels, 1, 1, false);
- if (whiteTexture != 0) TraceLog(INFO, "[TEX ID %i] Base white texture created successfully", whiteTexture);
- else TraceLog(WARNING, "Base white texture could not be created");
+ if (whiteTexture != 0) TraceLog(INFO, "[TEX ID %i] Base white texture loaded successfully", whiteTexture);
+ else TraceLog(WARNING, "Base white texture could not be loaded");
// Init draw calls tracking system
draws = (DrawCall *)malloc(sizeof(DrawCall)*MAX_DRAWS_BY_TEXTURE);
@@ -1113,6 +1129,8 @@ void rlglDrawModel(Model model, Vector3 position, Vector3 rotation, Vector3 scal
#if defined(GRAPHICS_API_OPENGL_33) || defined(GRAPHICS_API_OPENGL_ES2)
glUseProgram(shaderProgram); // Use our shader
+ VectorScale(&rotation, DEG2RAD);
+
// Get transform matrix (rotation -> scale -> translation)
Matrix transform = MatrixTransform(position, rotation, scale);
Matrix modelviewworld = MatrixMultiply(transform, modelview);
@@ -1305,7 +1323,8 @@ unsigned int rlglLoadTexture(unsigned char *data, int width, int height, bool ge
#if defined(GRAPHICS_API_OPENGL_33)
// NOTE: We define internal (GPU) format as GL_RGBA8 (probably BGRA8 in practice, driver takes care)
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
+ //glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); // OpenGL
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); // WebGL
#elif defined(GRAPHICS_API_OPENGL_ES2)
// NOTE: On embedded systems, we let the driver choose the best internal format
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
@@ -1324,7 +1343,7 @@ unsigned int rlglLoadTexture(unsigned char *data, int width, int height, bool ge
// Unbind current texture
glBindTexture(GL_TEXTURE_2D, 0);
- TraceLog(INFO, "[TEX ID %i] Texture created successfully (%i x %i)", id, width, height);
+ TraceLog(INFO, "[TEX ID %i] Texture created successfully (%ix%i)", id, width, height);
return id;
}
@@ -1347,7 +1366,7 @@ Model rlglLoadModel(VertexData mesh)
#elif defined(GRAPHICS_API_OPENGL_33) || defined(GRAPHICS_API_OPENGL_ES2)
model.textureId = 1; // Default whiteTexture
- GLuint vaoModel; // Vertex Array Objects (VAO)
+ GLuint vaoModel = 0; // Vertex Array Objects (VAO)
GLuint vertexBuffer[3]; // Vertex Buffer Objects (VBO)
if (vaoSupported)
@@ -1562,6 +1581,7 @@ static GLuint LoadDefaultShaders(void)
char fShaderStr[] = " #version 110 \n" // NOTE: Equivalent to version 100 on ES2
#elif defined(GRAPHICS_API_OPENGL_ES2)
char fShaderStr[] = " #version 100 \n" // NOTE: Must be defined this way! 110 doesn't work!
+ "precision mediump float; \n" // WebGL, required for emscripten
#endif
"uniform sampler2D texture0; \n"
"varying vec2 fragTexCoord; \n"
@@ -1673,33 +1693,35 @@ static GLuint LoadShaders(char *vertexFileName, char *fragmentFileName)
}
// Read shader text file
-static char *TextFileRead(char *fn)
+static char *TextFileRead(char *fileName)
{
- FILE *fp;
+ FILE *textFile;
char *text = NULL;
int count=0;
- if (fn != NULL)
+ if (fileName != NULL)
{
- fp = fopen(fn,"rt");
+ textFile = fopen(fileName,"rt");
- if (fp != NULL)
+ if (textFile != NULL)
{
- fseek(fp, 0, SEEK_END);
- count = ftell(fp);
- rewind(fp);
+ fseek(textFile, 0, SEEK_END);
+ count = ftell(textFile);
+ rewind(textFile);
if (count > 0)
{
text = (char *)malloc(sizeof(char) * (count+1));
- count = fread(text, sizeof(char), count, fp);
+ count = fread(text, sizeof(char), count, textFile);
text[count] = '\0';
}
- fclose(fp);
+ fclose(textFile);
}
+ else TraceLog(WARNING, "[%s] Text file could not be opened", fileName);
}
+
return text;
}
@@ -1798,7 +1820,7 @@ static void InitializeBuffersGPU(void)
glGenVertexArrays(1, &vaoTriangles);
glBindVertexArray(vaoTriangles);
}
-
+
// Create buffers for our vertex data
glGenBuffers(2, trianglesBuffer);
@@ -1823,7 +1845,7 @@ static void InitializeBuffersGPU(void)
glGenVertexArrays(1, &vaoQuads);
glBindVertexArray(vaoQuads);
}
-
+
// Create buffers for our vertex data
glGenBuffers(4, quadsBuffer);
@@ -1859,6 +1881,8 @@ static void InitializeBuffersGPU(void)
}
// Update VBOs with vertex array data
+// TODO: If there is not vertex data, buffers doesn't need to be updated (vertexCount > 0)
+// TODO: If no data changed on the CPU arrays --> No need to update GPU arrays every frame!
static void UpdateBuffers(void)
{
// Activate Lines VAO
@@ -1973,7 +1997,7 @@ static int GenerateMipmaps(unsigned char *data, int baseWidth, int baseHeight)
j++;
}
- TraceLog(DEBUG, "Mipmap base (%i, %i)", width, height);
+ TraceLog(DEBUG, "Mipmap base (%ix%i)", width, height);
for (int mip = 1; mip < mipmapCount; mip++)
{
@@ -2044,7 +2068,7 @@ static pixel *GenNextMipmap(pixel *srcData, int srcWidth, int srcHeight)
}
}
- TraceLog(DEBUG, "Mipmap generated successfully (%i, %i)", width, height);
+ TraceLog(DEBUG, "Mipmap generated successfully (%ix%i)", width, height);
return mipmap;
}
diff --git a/src/rlgl.h b/src/rlgl.h
index 2e1d18f4..5fa9c486 100644
--- a/src/rlgl.h
+++ b/src/rlgl.h
@@ -45,7 +45,7 @@
// Choose opengl version here or just define it at compile time: -DGRAPHICS_API_OPENGL_33
//#define GRAPHICS_API_OPENGL_11 // Only available on PLATFORM_DESKTOP
//#define GRAPHICS_API_OPENGL_33 // Only available on PLATFORM_DESKTOP
-//#define GRAPHICS_API_OPENGL_ES2 // Only available on PLATFORM_ANDROID or PLATFORM_RPI
+//#define GRAPHICS_API_OPENGL_ES2 // Only available on PLATFORM_ANDROID or PLATFORM_RPI or PLATFORM_WEB
// Security check in case no GRAPHICS_API_OPENGL_* defined
#if !defined(GRAPHICS_API_OPENGL_11) && !defined(GRAPHICS_API_OPENGL_33) && !defined(GRAPHICS_API_OPENGL_ES2)
@@ -72,8 +72,9 @@
#define MAX_TRIANGLES_BATCH 4096
#define MAX_QUADS_BATCH 4096
#elif defined(GRAPHICS_API_OPENGL_ES2)
- // NOTE: Reduce memory sizes for embedded systems (RPI)
- #define MAX_LINES_BATCH 2048 // Critical for wire shapes (sphere)
+ // NOTE: Reduce memory sizes for embedded systems (RPI and HTML5)
+ // NOTE: On HTML5 (emscripten) this is allocated on heap, by default it's only 16MB!...just take care...
+ #define MAX_LINES_BATCH 1024 // Critical for wire shapes (sphere)
#define MAX_TRIANGLES_BATCH 2048 // Critical for some shapes (sphere)
#define MAX_QUADS_BATCH 1024 // Be careful with text, every letter maps a quad
#endif
diff --git a/src/shapes.c b/src/shapes.c
index 6fa26bee..d872eacf 100644
--- a/src/shapes.c
+++ b/src/shapes.c
@@ -44,7 +44,7 @@
//----------------------------------------------------------------------------------
// Global Variables Definition
//----------------------------------------------------------------------------------
-// It's lonely here...
+extern unsigned int whiteTexture;
//----------------------------------------------------------------------------------
// Module specific Functions Declaration
@@ -197,7 +197,7 @@ void DrawRectangleV(Vector2 position, Vector2 size, Color color)
else if ((rlGetVersion() == OPENGL_33) || (rlGetVersion() == OPENGL_ES_20))
{
// NOTE: This shape uses QUADS to avoid drawing order issues (view rlglDraw)
- rlEnableTexture(1); // Default white texture
+ rlEnableTexture(whiteTexture); // Default white texture
rlBegin(RL_QUADS);
rlColor4ub(color.r, color.g, color.b, color.a);
diff --git a/src/stb_image.h b/src/stb_image.h
index 461ef087..39cbb7ad 100644
--- a/src/stb_image.h
+++ b/src/stb_image.h
@@ -1,19 +1,27 @@
-/* stb_image - v1.46 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
- when you control the images you're loading
+/* stb_image - v2.00b - public domain image loader - http://nothings.org/stb_image.h
no warranty implied; use at your own risk
Do this:
#define STB_IMAGE_IMPLEMENTATION
before you include this file in *one* C or C++ file to create the implementation.
- #define STBI_ASSERT(x) to avoid using assert.h.
+ // i.e. it should look like this:
+ #include ...
+ #include ...
+ #include ...
+ #define STB_IMAGE_IMPLEMENTATION
+ #include "stb_image.h"
+
+ You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
+ And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
+
QUICK NOTES:
Primarily of interest to game developers and other people who can
avoid problematic images and only need the trivial interface
- JPEG baseline (no JPEG progressive)
- PNG 8-bit-per-channel only
+ JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
+ PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
TGA (not sure what subset, if a subset)
BMP non-1bpp, non-RLE
@@ -22,33 +30,138 @@
GIF (*comp always reports as 4-channel)
HDR (radiance rgbE format)
PIC (Softimage PIC)
+ PNM (PPM and PGM binary only)
- decode from memory or through FILE (define STBI_NO_STDIO to remove code)
- decode from arbitrary I/O callbacks
- - overridable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD)
-
- Latest revisions:
- 1.46 (2014-08-26) fix broken tRNS chunk in non-paletted PNG
- 1.45 (2014-08-16) workaround MSVC-ARM internal compiler error by wrapping malloc
- 1.44 (2014-08-07) warnings
- 1.43 (2014-07-15) fix MSVC-only bug in 1.42
- 1.42 (2014-07-09) no _CRT_SECURE_NO_WARNINGS; error-path fixes; STBI_ASSERT
- 1.41 (2014-06-25) fix search&replace that messed up comments/error messages
- 1.40 (2014-06-22) gcc warning
- 1.39 (2014-06-15) TGA optimization bugfix, multiple BMP fixes
- 1.38 (2014-06-06) suppress MSVC run-time warnings, fix accidental rename of 'skip'
- 1.37 (2014-06-04) remove duplicate typedef
- 1.36 (2014-06-03) converted to header file, allow reading incorrect iphoned-images without iphone flag
- 1.35 (2014-05-27) warnings, bugfixes, TGA optimization, etc
+ - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
+
+ Full documentation under "DOCUMENTATION" below.
+
+
+ Revision 2.00 release notes:
+
+ - Progressive JPEG is now supported.
+
+ - PPM and PGM binary formats are now supported, thanks to Ken Miller.
+
+ - x86 platforms now make use of SSE2 SIMD instructions for
+ JPEG decoding, and ARM platforms can use NEON SIMD if requested.
+ This work was done by Fabian "ryg" Giesen. SSE2 is used by
+ default, but NEON must be enabled explicitly; see docs.
+
+ With other JPEG optimizations included in this version, we see
+ 2x speedup on a JPEG on an x86 machine, and a 1.5x speedup
+ on a JPEG on an ARM machine, relative to previous versions of this
+ library. The same results will not obtain for all JPGs and for all
+ x86/ARM machines. (Note that progressive JPEGs are significantly
+ slower to decode than regular JPEGs.) This doesn't mean that this
+ is the fastest JPEG decoder in the land; rather, it brings it
+ closer to parity with standard libraries. If you want the fastest
+ decode, look elsewhere. (See "Philosophy" section of docs below.)
+
+ See final bullet items below for more info on SIMD.
+
+ - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing
+ the memory allocator. Unlike other STBI libraries, these macros don't
+ support a context parameter, so if you need to pass a context in to
+ the allocator, you'll have to store it in a global or a thread-local
+ variable.
+
+ - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and
+ STBI_NO_LINEAR.
+ STBI_NO_HDR: suppress implementation of .hdr reader format
+ STBI_NO_LINEAR: suppress high-dynamic-range light-linear float API
+
+ - You can suppress implementation of any of the decoders to reduce
+ your code footprint by #defining one or more of the following
+ symbols before creating the implementation.
+
+ STBI_NO_JPEG
+ STBI_NO_PNG
+ STBI_NO_BMP
+ STBI_NO_PSD
+ STBI_NO_TGA
+ STBI_NO_GIF
+ STBI_NO_HDR
+ STBI_NO_PIC
+ STBI_NO_PNM (.ppm and .pgm)
+
+ - You can request *only* certain decoders and suppress all other ones
+ (this will be more forward-compatible, as addition of new decoders
+ doesn't require you to disable them explicitly):
+
+ STBI_ONLY_JPEG
+ STBI_ONLY_PNG
+ STBI_ONLY_BMP
+ STBI_ONLY_PSD
+ STBI_ONLY_TGA
+ STBI_ONLY_GIF
+ STBI_ONLY_HDR
+ STBI_ONLY_PIC
+ STBI_ONLY_PNM (.ppm and .pgm)
+
+ Note that you can define multiples of these, and you will get all
+ of them ("only x" and "only y" is interpreted to mean "only x&y").
+
+ - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+ want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+
+ - Compilation of all SIMD code can be suppressed with
+ #define STBI_NO_SIMD
+ It should not be necessary to disable SIMD unless you have issues
+ compiling (e.g. using an x86 compiler which doesn't support SSE
+ intrinsics or that doesn't support the method used to detect
+ SSE2 support at run-time), and even those can be reported as
+ bugs so I can refine the built-in compile-time checking to be
+ smarter.
+
+ - The old STBI_SIMD system which allowed installing a user-defined
+ IDCT etc. has been removed. If you need this, don't upgrade. My
+ assumption is that almost nobody was doing this, and those who
+ were will find the built-in SIMD more satisfactory anyway.
+
+ - RGB values computed for JPEG images are slightly different from
+ previous versions of stb_image. (This is due to using less
+ integer precision in SIMD.) The C code has been adjusted so
+ that the same RGB values will be computed regardless of whether
+ SIMD support is available, so your app should always produce
+ consistent results. But these results are slightly different from
+ previous versions. (Specifically, about 3% of available YCbCr values
+ will compute different RGB results from pre-1.49 versions by +-1;
+ most of the deviating values are one smaller in the G channel.)
+
+ - If you must produce consistent results with previous versions of
+ stb_image, #define STBI_JPEG_OLD and you will get the same results
+ you used to; however, you will not get the SIMD speedups for
+ the YCbCr-to-RGB conversion step (although you should still see
+ significant JPEG speedup from the other changes).
+
+ Please note that STBI_JPEG_OLD is a temporary feature; it will be
+ removed in future versions of the library. It is only intended for
+ near-term back-compatibility use.
+
+
+ Latest revision history:
+ 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+ 2.00 (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
+ progressive JPEG
+ PGM/PPM support
+ STBI_MALLOC,STBI_REALLOC,STBI_FREE
+ STBI_NO_*, STBI_ONLY_*
+ GIF bugfix
+ 1.48 (2014-12-14) fix incorrectly-named assert()
+ 1.47 (2014-12-14) 1/2/4-bit PNG support (both grayscale and paletted)
+ optimize PNG
+ fix bug in interlaced PNG with user-specified channel count
+ 1.46 (2014-08-26) fix broken tRNS chunk in non-paletted PNG
+ 1.45 (2014-08-16) workaround MSVC-ARM internal compiler error by wrapping malloc
See end of file for full revision history.
- TODO:
- stbi_info support for BMP,PSD,HDR,PIC
-
============================ Contributors =========================
-
+
Image formats Bug fixes & warning fixes
Sean Barrett (jpeg, png, bmp) Marc LeBlanc
Nicolas Schulz (hdr, psd) Christpher Lloyd
@@ -56,42 +169,52 @@
Jean-Marc Lienher (gif) Won Chun
Tom Seddon (pic) the Horde3D community
Thatcher Ulrich (psd) Janez Zemva
- Jonathan Blow
+ Ken Miller (pgm, ppm) Jonathan Blow
Laurent Gomila
- Extensions, features Aruelien Pocheville
- Jetro Lauha (stbi_info) Ryamond Barbiero
- James "moose2000" Brown (iPhone PNG) David Woo
- Ben "Disch" Wenger (io callbacks) Roy Eltham
- Martin "SpartanJ" Golini Luke Graham
- Thomas Ruf
+ Aruelien Pocheville
+ Extensions, features Ryamond Barbiero
+ Jetro Lauha (stbi_info) David Woo
+ Martin "SpartanJ" Golini (stbi_info) Martin Golini
+ James "moose2000" Brown (iPhone PNG) Roy Eltham
+ Ben "Disch" Wenger (io callbacks) Luke Graham
+ Omar Cornut (1/2/4-bit PNG) Thomas Ruf
John Bartholomew
- Optimizations & bugfixes Ken Hamada
- Fabian "ryg" Giesen Cort Stratton
- Arseny Kapoulkine Blazej Dariusz Roszkowski
- Thibault Reuille
+ Ken Hamada
+ Optimizations & bugfixes Cort Stratton
+ Fabian "ryg" Giesen Blazej Dariusz Roszkowski
+ Arseny Kapoulkine Thibault Reuille
Paul Du Bois
Guillaume George
- Jerry Jansson
- If your name should be here but Hayaki Saito
- isn't, let Sean know. Johan Duparc
+ If your name should be here but Jerry Jansson
+ isn't, let Sean know. Hayaki Saito
+ Johan Duparc
Ronny Chevalier
Michal Cichon
+ Tero Hanninen
+
+License:
+ This software is in the public domain. Where that dedication is not
+ recognized, you are granted a perpetual, irrevocable license to copy
+ and modify this file however you want.
+
*/
#ifndef STBI_INCLUDE_STB_IMAGE_H
#define STBI_INCLUDE_STB_IMAGE_H
+// DOCUMENTATION
+//
// Limitations:
-// - no jpeg progressive support
-// - non-HDR formats support 8-bit samples only (jpeg, png)
-// - no delayed line count (jpeg) -- IJG doesn't support either
+// - no 16-bit-per-channel PNG
+// - no 12-bit-per-channel JPEG
+// - no JPEGs with arithmetic coding
// - no 1-bit BMP
// - GIF always returns *comp=4
//
-// Basic usage (see HDR discussion below):
+// Basic usage (see HDR discussion below for HDR usage):
// int x,y,n;
// unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
-// // ... process data if not NULL ...
+// // ... process data if not NULL ...
// // ... x = width, y = height, n = # 8-bit components per pixel ...
// // ... replace '0' with '1'..'4' to force that many components per pixel
// // ... but 'n' will always be the number that it would have been if you said 0
@@ -104,14 +227,16 @@
// int req_comp -- if non-zero, # of image components requested in result
//
// The return value from an image loader is an 'unsigned char *' which points
-// to the pixel data. The pixel data consists of *y scanlines of *x pixels,
+// to the pixel data, or NULL on an allocation failure or if the image is
+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
// with each pixel consisting of N interleaved 8-bit components; the first
// pixel pointed to is top-left-most in the image. There is no padding between
// image scanlines or between pixels, regardless of format. The number of
// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
// If req_comp is non-zero, *comp has the number of components that _would_
// have been output otherwise. E.g. if you set req_comp to 4, you will always
-// get RGBA output, but you can check *comp to easily see if it's opaque.
+// get RGBA output, but you can check *comp to see if it's trivially opaque
+// because e.g. there were only 3 channels in the source image.
//
// An output image with N components has the following components interleaved
// in this order in each pixel:
@@ -133,18 +258,66 @@
//
// ===========================================================================
//
-// iPhone PNG support:
+// Philosophy
//
-// By default we convert iphone-formatted PNGs back to RGB; nominally they
-// would silently load as BGR, except the existing code should have just
-// failed on such iPhone PNGs. But you can disable this conversion by
-// by calling stbi_convert_iphone_png_to_rgb(0), in which case
-// you will always just get the native iphone "format" through.
+// stb libraries are designed with the following priorities:
//
-// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
-// pixel to remove any premultiplied alpha *only* if the image file explicitly
-// says there's premultiplied data (currently only happens in iPhone images,
-// and only if iPhone convert-to-rgb processing is on).
+// 1. easy to use
+// 2. easy to maintain
+// 3. good performance
+//
+// Sometimes I let "good performance" creep up in priority over "easy to maintain",
+// and for best performance I may provide less-easy-to-use APIs that give higher
+// performance, in addition to the easy to use ones. Nevertheless, it's important
+// to keep in mind that from the standpoint of you, a client of this library,
+// all you care about is #1 and #3, and stb libraries do not emphasize #3 above all.
+//
+// Some secondary priorities arise directly from the first two, some of which
+// make more explicit reasons why performance can't be emphasized.
+//
+// - Portable ("ease of use")
+// - Small footprint ("easy to maintain")
+// - No dependencies ("ease of use")
+//
+// ===========================================================================
+//
+// I/O callbacks
+//
+// I/O callbacks allow you to read from arbitrary sources, like packaged
+// files or some other source. Data read from callbacks are processed
+// through a small internal buffer (currently 128 bytes) to try to reduce
+// overhead.
+//
+// The three functions you must define are "read" (reads some bytes of data),
+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
+//
+// ===========================================================================
+//
+// SIMD support
+//
+// The JPEG decoder will try to automatically use SIMD kernels on x86 when
+// supported by the compiler. For ARM Neon support, you must explicitly
+// request it.
+//
+// (The old do-it-yourself SIMD API is no longer supported in the current
+// code.)
+//
+// On x86, SSE2 will automatically be used when available based on a run-time
+// test; if not, the generic C versions are used as a fall-back. On ARM targets,
+// the typical path is to have separate builds for NEON and non-NEON devices
+// (at least this is true for iOS and Android). Therefore, the NEON support is
+// toggled by a build flag: define STBI_NEON to get NEON loops.
+//
+// The output of the JPEG decoder is slightly different from versions where
+// SIMD support was introduced (that is, for versions before 1.49). The
+// difference is only +-1 in the 8-bit RGB channels, and only on a small
+// fraction of pixels. You can force the pre-1.49 behavior by defining
+// STBI_JPEG_OLD, but this will disable some of the SIMD decoding path
+// and hence cost some performance.
+//
+// If for some reason you do not want to use any of SIMD code, or if
+// you have issues compiling it, you can disable it entirely by
+// defining STBI_NO_SIMD.
//
// ===========================================================================
//
@@ -167,7 +340,7 @@
// (linear) floats to preserve the full dynamic range:
//
// float *data = stbi_loadf(filename, &x, &y, &n, 0);
-//
+//
// If you load LDR images through this interface, those images will
// be promoted to floating point values, run through the inverse of
// constants corresponding to the above:
@@ -184,17 +357,22 @@
//
// ===========================================================================
//
-// I/O callbacks
+// iPhone PNG support:
//
-// I/O callbacks allow you to read from arbitrary sources, like packaged
-// files or some other source. Data read from callbacks are processed
-// through a small internal buffer (currently 128 bytes) to try to reduce
-// overhead.
+// By default we convert iphone-formatted PNGs back to RGB, even though
+// they are internally encoded differently. You can disable this conversion
+// by by calling stbi_convert_iphone_png_to_rgb(0), in which case
+// you will always just get the native iphone "format" through (which
+// is BGR stored in RGB).
+//
+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
+// pixel to remove any premultiplied alpha *only* if the image file explicitly
+// says there's premultiplied data (currently only happens in iPhone images,
+// and only if iPhone convert-to-rgb processing is on).
//
-// The three functions you must define are "read" (reads some bytes of data),
-// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
#define STBI_NO_HDR // RaySan: not required by raylib
+#define STBI_NO_SIMD // RaySan: issues when compiling with GCC 4.7.2
#ifndef STBI_NO_STDIO
#include <stdio.h>
@@ -238,41 +416,43 @@ extern "C" {
// load image by filename, open file, or memory buffer
//
-STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
-
-#ifndef STBI_NO_STDIO
-STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp);
-// for stbi_load_from_file, file pointer is left pointing immediately after image
-#endif
-
typedef struct
{
- int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read
+ int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read
void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
int (*eof) (void *user); // returns nonzero if we are at end of file/data
} stbi_io_callbacks;
-STBIDEF stbi_uc *stbi_load_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *comp, int req_comp);
-#ifndef STBI_NO_HDR
- STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp);
+// for stbi_load_from_file, file pointer is left pointing immediately after image
+#endif
+
+#ifndef STBI_NO_LINEAR
+ STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp);
+ STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
+ STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
#ifndef STBI_NO_STDIO
- STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp);
STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp);
#endif
-
- STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
+#endif
+#ifndef STBI_NO_HDR
STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
STBIDEF void stbi_hdr_to_ldr_scale(float scale);
+#endif
+#ifndef STBI_NO_LINEAR
STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
STBIDEF void stbi_ldr_to_hdr_scale(float scale);
#endif // STBI_NO_HDR
-// stbi_is_hdr is always defined
+// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
#ifndef STBI_NO_STDIO
@@ -283,7 +463,7 @@ STBIDEF int stbi_is_hdr_from_file(FILE *f);
// get a VERY brief reason for failure
// NOT THREADSAFE
-STBIDEF const char *stbi_failure_reason (void);
+STBIDEF const char *stbi_failure_reason (void);
// free the loaded image -- this is just free()
STBIDEF void stbi_image_free (void *retval_from_stbi_load);
@@ -321,26 +501,6 @@ STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int
STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
-// define faster low-level operations (typically SIMD support)
-#ifdef STBI_SIMD
-typedef void (*stbi_idct_8x8)(stbi_uc *out, int out_stride, short data[64], unsigned short *dequantize);
-// compute an integer IDCT on "input"
-// input[x] = data[x] * dequantize[x]
-// write results to 'out': 64 samples, each run of 8 spaced by 'out_stride'
-// CLAMP results to 0..255
-typedef void (*stbi_YCbCr_to_RGB_run)(stbi_uc *output, stbi_uc const *y, stbi_uc const *cb, stbi_uc const *cr, int count, int step);
-// compute a conversion from YCbCr to RGB
-// 'count' pixels
-// write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B
-// y: Y input channel
-// cb: Cb input channel; scale/biased to be 0..255
-// cr: Cr input channel; scale/biased to be 0..255
-
-STBIDEF void stbi_install_idct(stbi_idct_8x8 func);
-STBIDEF void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func);
-#endif // STBI_SIMD
-
-
#ifdef __cplusplus
}
#endif
@@ -352,22 +512,62 @@ STBIDEF void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func);
#ifdef STB_IMAGE_IMPLEMENTATION
-#ifndef STBI_NO_HDR
+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
+ || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
+ || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
+ || defined(STBI_ONLY_ZLIB)
+ #ifndef STBI_ONLY_JPEG
+ #define STBI_NO_JPEG
+ #endif
+ #ifndef STBI_ONLY_PNG
+ #define STBI_NO_PNG
+ #endif
+ #ifndef STBI_ONLY_BMP
+ #define STBI_NO_BMP
+ #endif
+ #ifndef STBI_ONLY_PSD
+ #define STBI_NO_PSD
+ #endif
+ #ifndef STBI_ONLY_TGA
+ #define STBI_NO_TGA
+ #endif
+ #ifndef STBI_ONLY_GIF
+ #define STBI_NO_GIF
+ #endif
+ #ifndef STBI_ONLY_HDR
+ #define STBI_NO_HDR
+ #endif
+ #ifndef STBI_ONLY_PIC
+ #define STBI_NO_PIC
+ #endif
+ #ifndef STBI_ONLY_PNM
+ #define STBI_NO_PNM
+ #endif
+#endif
+
+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
+#define STBI_NO_ZLIB
+#endif
+
+
+#include <stdarg.h>
+#include <stddef.h> // ptrdiff_t on osx
+#include <stdlib.h>
+#include <string.h>
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
#include <math.h> // ldexp
-#include <string.h> // strcmp, strtok
#endif
#ifndef STBI_NO_STDIO
#include <stdio.h>
#endif
-#include <stdlib.h>
-#include <string.h>
+
#ifndef STBI_ASSERT
#include <assert.h>
#define STBI_ASSERT(x) assert(x)
#endif
-#include <stdarg.h>
-#include <stddef.h> // ptrdiff_t on osx
+
#ifndef _MSC_VER
#ifdef __cplusplus
@@ -412,6 +612,86 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
#define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
#endif
+#if defined(STBI_MALLOC) && defined(STBI_FREE) && defined(STBI_REALLOC)
+// ok
+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC)
+// ok
+#else
+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC."
+#endif
+
+#ifndef STBI_MALLOC
+#define STBI_MALLOC(sz) malloc(sz)
+#define STBI_REALLOC(p,sz) realloc(p,sz)
+#define STBI_FREE(p) free(p)
+#endif
+
+#if !defined(STBI_NO_SIMD) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86))
+#define STBI_SSE2
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1400 // not VC6
+#include <intrin.h> // __cpuid
+static int stbi__cpuid3(void)
+{
+ int info[4];
+ __cpuid(info,1);
+ return info[3];
+}
+#else
+static int stbi__cpuid3(void)
+{
+ int res;
+ __asm {
+ mov eax,1
+ cpuid
+ mov res,edx
+ }
+ return res;
+}
+#endif
+
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
+static int stbi__sse2_available()
+{
+ int info3 = stbi__cpuid3();
+ return ((info3 >> 26) & 1) != 0;
+}
+#else // assume GCC-style if not VC++
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
+static int stbi__sse2_available()
+{
+#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later
+ // GCC 4.8+ has a nice way to do this
+ return __builtin_cpu_supports("sse2");
+#else
+ // portable way to do this, preferably without using GCC inline ASM?
+ // just bail for now.
+ return 0;
+#endif
+}
+#endif
+#endif
+
+// ARM NEON
+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+#undef STBI_NEON
+#endif
+
+#ifdef STBI_NEON
+#include <arm_neon.h>
+// assume GCC or Clang on ARM targets
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+#endif
+
+#ifndef STBI_SIMD_ALIGN
+#define STBI_SIMD_ALIGN(type, name) type name
+#endif
+
///////////////////////////////////////////////
//
// stbi__context struct and start_xxx functions
@@ -422,7 +702,7 @@ typedef struct
{
stbi__uint32 img_x, img_y;
int img_n, img_out_n;
-
+
stbi_io_callbacks io;
void *io_user_data;
@@ -498,29 +778,59 @@ static void stbi__rewind(stbi__context *s)
s->img_buffer = s->img_buffer_original;
}
+#ifndef STBI_NO_JPEG
static int stbi__jpeg_test(stbi__context *s);
static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNG
static int stbi__png_test(stbi__context *s);
static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_BMP
static int stbi__bmp_test(stbi__context *s);
static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_TGA
static int stbi__tga_test(stbi__context *s);
static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PSD
static int stbi__psd_test(stbi__context *s);
static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
#ifndef STBI_NO_HDR
static int stbi__hdr_test(stbi__context *s);
static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
#endif
+
+#ifndef STBI_NO_PIC
static int stbi__pic_test(stbi__context *s);
static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_GIF
static int stbi__gif_test(stbi__context *s);
static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+#ifndef STBI_NO_PNM
+static int stbi__pnm_test(stbi__context *s);
+static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
// this is not threadsafe
static const char *stbi__g_failure_reason;
@@ -538,7 +848,7 @@ static int stbi__err(const char *str)
static void *stbi__malloc(size_t size)
{
- return malloc(size);
+ return STBI_MALLOC(size);
}
// stbi__err - error
@@ -558,22 +868,40 @@ static void *stbi__malloc(size_t size)
STBIDEF void stbi_image_free(void *retval_from_stbi_load)
{
- free(retval_from_stbi_load);
+ STBI_FREE(retval_from_stbi_load);
}
-#ifndef STBI_NO_HDR
+#ifndef STBI_NO_LINEAR
static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
+#endif
+
+#ifndef STBI_NO_HDR
static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
#endif
static unsigned char *stbi_load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
{
+ #ifndef STBI_NO_JPEG
if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp);
+ #endif
+ #ifndef STBI_NO_PNG
if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp);
+ #endif
+ #ifndef STBI_NO_BMP
if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp);
+ #endif
+ #ifndef STBI_NO_GIF
if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp);
+ #endif
+ #ifndef STBI_NO_PSD
if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp);
+ #endif
+ #ifndef STBI_NO_PIC
if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp);
+ #endif
+ #ifndef STBI_NO_PNM
+ if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp);
+ #endif
#ifndef STBI_NO_HDR
if (stbi__hdr_test(s)) {
@@ -582,15 +910,18 @@ static unsigned char *stbi_load_main(stbi__context *s, int *x, int *y, int *comp
}
#endif
+ #ifndef STBI_NO_TGA
// test tga last because it's a crappy test!
if (stbi__tga_test(s))
return stbi__tga_load(s,x,y,comp,req_comp);
+ #endif
+
return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
}
#ifndef STBI_NO_STDIO
-FILE *stbi__fopen(char const *filename, char const *mode)
+static FILE *stbi__fopen(char const *filename, char const *mode)
{
FILE *f;
#if defined(_MSC_VER) && _MSC_VER >= 1400
@@ -603,7 +934,7 @@ FILE *stbi__fopen(char const *filename, char const *mode)
}
-STBIDEF unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
+STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
{
FILE *f = stbi__fopen(filename, "rb");
unsigned char *result;
@@ -613,7 +944,7 @@ STBIDEF unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp
return result;
}
-STBIDEF unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
{
unsigned char *result;
stbi__context s;
@@ -627,23 +958,22 @@ STBIDEF unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, i
}
#endif //!STBI_NO_STDIO
-STBIDEF unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
{
stbi__context s;
stbi__start_mem(&s,buffer,len);
return stbi_load_main(&s,x,y,comp,req_comp);
}
-unsigned char *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
{
stbi__context s;
stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
return stbi_load_main(&s,x,y,comp,req_comp);
}
-#ifndef STBI_NO_HDR
-
-float *stbi_loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+#ifndef STBI_NO_LINEAR
+static float *stbi_loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
{
unsigned char *data;
#ifndef STBI_NO_HDR
@@ -656,14 +986,14 @@ float *stbi_loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp
return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
}
-float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
{
stbi__context s;
stbi__start_mem(&s,buffer,len);
return stbi_loadf_main(&s,x,y,comp,req_comp);
}
-float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
{
stbi__context s;
stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
@@ -671,7 +1001,7 @@ float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int
}
#ifndef STBI_NO_STDIO
-float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
+STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
{
float *result;
FILE *f = stbi__fopen(filename, "rb");
@@ -681,7 +1011,7 @@ float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
return result;
}
-float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
{
stbi__context s;
stbi__start_file(&s,f);
@@ -689,13 +1019,13 @@ float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
}
#endif // !STBI_NO_STDIO
-#endif // !STBI_NO_HDR
+#endif // !STBI_NO_LINEAR
-// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
-// defined, for API simplicity; if STBI_NO_HDR is defined, it always
+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
// reports false!
-int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
{
#ifndef STBI_NO_HDR
stbi__context s;
@@ -743,17 +1073,17 @@ STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void
#endif
}
-#ifndef STBI_NO_HDR
static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
-void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
-void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
-
-void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
-void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
+#ifndef STBI_NO_LINEAR
+STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
+STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
#endif
+STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
+STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
+
//////////////////////////////////////////////////////////////////////////////
//
@@ -762,9 +1092,9 @@ void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
enum
{
- SCAN_load=0,
- SCAN_type,
- SCAN_header
+ STBI__SCAN_load=0,
+ STBI__SCAN_type,
+ STBI__SCAN_header
};
static void stbi__refill_buffer(stbi__context *s)
@@ -803,7 +1133,7 @@ stbi_inline static int stbi__at_eof(stbi__context *s)
if (s->read_from_callbacks == 0) return 1;
}
- return s->img_buffer >= s->img_buffer_end;
+ return s->img_buffer >= s->img_buffer_end;
}
static void stbi__skip(stbi__context *s, int n)
@@ -827,7 +1157,7 @@ static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
int res, count;
memcpy(buffer, s->img_buffer, blen);
-
+
count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
res = (count == (n-blen));
s->img_buffer = s->img_buffer_end;
@@ -867,6 +1197,9 @@ static stbi__uint32 stbi__get32le(stbi__context *s)
return z + (stbi__get16le(s) << 16);
}
+#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
+
+
//////////////////////////////////////////////////////////////////////////////
//
// generic converter from built-in img_n to req_comp
@@ -893,7 +1226,7 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
good = (unsigned char *) stbi__malloc(req_comp * x * y);
if (good == NULL) {
- free(data);
+ STBI_FREE(data);
return stbi__errpuc("outofmem", "Out of memory");
}
@@ -923,16 +1256,16 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
#undef CASE
}
- free(data);
+ STBI_FREE(data);
return good;
}
-#ifndef STBI_NO_HDR
+#ifndef STBI_NO_LINEAR
static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
{
int i,k,n;
float *output = (float *) stbi__malloc(x * y * comp * sizeof(float));
- if (output == NULL) { free(data); return stbi__errpf("outofmem", "Out of memory"); }
+ if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
// compute number of non-alpha components
if (comp & 1) n = comp; else n = comp-1;
for (i=0; i < x*y; ++i) {
@@ -941,16 +1274,18 @@ static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
}
if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
}
- free(data);
+ STBI_FREE(data);
return output;
}
+#endif
+#ifndef STBI_NO_HDR
#define stbi__float2int(x) ((int) (x))
static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
{
int i,k,n;
stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp);
- if (output == NULL) { free(data); return stbi__errpuc("outofmem", "Out of memory"); }
+ if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
// compute number of non-alpha components
if (comp & 1) n = comp; else n = comp-1;
for (i=0; i < x*y; ++i) {
@@ -967,17 +1302,16 @@ static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
output[i*comp + k] = (stbi_uc) stbi__float2int(z);
}
}
- free(data);
+ STBI_FREE(data);
return output;
}
#endif
//////////////////////////////////////////////////////////////////////////////
//
-// "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
+// "baseline" JPEG/JFIF decoder
//
// simple implementation
-// - channel subsampling of at most 2 in each dimension
// - doesn't support delayed output of y-dimension
// - simple interface (only one output format: 8-bit interleaved RGB)
// - doesn't try to recover corrupt jpegs
@@ -991,13 +1325,10 @@ static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
// - quality integer IDCT derived from IJG's 'slow'
// performance
// - fast huffman; reasonable integer IDCT
+// - some SIMD kernels for common paths on targets with SSE2/NEON
// - uses a lot of intermediate memory, could cache poorly
-// - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
-// stb_jpeg: 1.34 seconds (MSVC6, default release build)
-// stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro)
-// IJL11.dll: 1.08 seconds (compiled by intel)
-// IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG)
-// IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro)
+
+#ifndef STBI_NO_JPEG
// huffman decoding acceleration
#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
@@ -1015,13 +1346,11 @@ typedef struct
typedef struct
{
- #ifdef STBI_SIMD
- unsigned short dequant2[4][64];
- #endif
stbi__context *s;
stbi__huffman huff_dc[4];
stbi__huffman huff_ac[4];
stbi_uc dequant[4][64];
+ stbi__int16 fast_ac[4][1 << FAST_BITS];
// sizes for components, interleaved MCUs
int img_h_max, img_v_max;
@@ -1039,17 +1368,31 @@ typedef struct
int x,y,w2,h2;
stbi_uc *data;
- void *raw_data;
+ void *raw_data, *raw_coeff;
stbi_uc *linebuf;
+ short *coeff; // progressive only
+ int coeff_w, coeff_h; // number of 8x8 coefficient blocks
} img_comp[4];
- stbi__uint32 code_buffer; // jpeg entropy-coded buffer
+ stbi__uint32 code_buffer; // jpeg entropy-coded buffer
int code_bits; // number of valid bits
unsigned char marker; // marker seen while filling entropy buffer
int nomore; // flag if we saw a marker so must stop
+ int progressive;
+ int spec_start;
+ int spec_end;
+ int succ_high;
+ int succ_low;
+ int eob_run;
+
int scan_n, order[4];
int restart_interval, todo;
+
+// kernels
+ void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
+ void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
+ stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
} stbi__jpeg;
static int stbi__build_huffman(stbi__huffman *h, int *count)
@@ -1093,6 +1436,33 @@ static int stbi__build_huffman(stbi__huffman *h, int *count)
return 1;
}
+// build a table that decodes both magnitude and value of small ACs in
+// one go.
+static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
+{
+ int i;
+ for (i=0; i < (1 << FAST_BITS); ++i) {
+ stbi_uc fast = h->fast[i];
+ fast_ac[i] = 0;
+ if (fast < 255) {
+ int rs = h->values[fast];
+ int run = (rs >> 4) & 15;
+ int magbits = rs & 15;
+ int len = h->size[fast];
+
+ if (magbits && len + magbits <= FAST_BITS) {
+ // magnitude code followed by receive_extend code
+ int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
+ int m = 1 << (magbits - 1);
+ if (k < m) k += (-1 << magbits) + 1;
+ // if the result is small enough, we can fit it in fast_ac table
+ if (k >= -128 && k <= 127)
+ fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
+ }
+ }
+ }
+}
+
static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
{
do {
@@ -1163,31 +1533,45 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
return h->values[c];
}
+// bias[n] = (-1<<n) + 1
+static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
+
// combined JPEG 'receive' and JPEG 'extend', since baseline
// always extends everything it receives.
stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
{
- unsigned int m = 1 << (n-1);
unsigned int k;
+ int sgn;
if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
- #if 1
+ sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
k = stbi_lrot(j->code_buffer, n);
j->code_buffer = k & ~stbi__bmask[n];
k &= stbi__bmask[n];
j->code_bits -= n;
- #else
- k = (j->code_buffer >> (32 - n)) & stbi__bmask[n];
+ return k + (stbi__jbias[n] & ~sgn);
+}
+
+// get some unsigned bits
+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
+{
+ unsigned int k;
+ if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+ k = stbi_lrot(j->code_buffer, n);
+ j->code_buffer = k & ~stbi__bmask[n];
+ k &= stbi__bmask[n];
j->code_bits -= n;
- j->code_buffer <<= n;
- #endif
- // the following test is probably a random branch that won't
- // predict well. I tried to table accelerate it but failed.
- // maybe it's compiling as a conditional move?
- if (k < m)
- return (-1 << n) + k + 1;
- else
- return k;
+ return k;
+}
+
+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
+{
+ unsigned int k;
+ if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
+ k = j->code_buffer;
+ j->code_buffer <<= 1;
+ --j->code_bits;
+ return k & 0x80000000;
}
// given a value that's at position X in the zigzag stream,
@@ -1208,10 +1592,13 @@ static stbi_uc stbi__jpeg_dezigzag[64+15] =
};
// decode one 64-entry block--
-static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, int b)
+static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
{
int diff,dc,k;
- int t = stbi__jpeg_huff_decode(j, hdc);
+ int t;
+
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+ t = stbi__jpeg_huff_decode(j, hdc);
if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
// 0 all the ac values now so we can do it 32-bits at a time
@@ -1220,28 +1607,195 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman
diff = t ? stbi__extend_receive(j, t) : 0;
dc = j->img_comp[b].dc_pred + diff;
j->img_comp[b].dc_pred = dc;
- data[0] = (short) dc;
+ data[0] = (short) (dc * dequant[0]);
// decode AC components, see JPEG spec
k = 1;
do {
- int r,s;
- int rs = stbi__jpeg_huff_decode(j, hac);
- if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
- s = rs & 15;
- r = rs >> 4;
- if (s == 0) {
- if (rs != 0xf0) break; // end block
- k += 16;
- } else {
- k += r;
+ unsigned int zig;
+ int c,r,s;
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+ c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+ r = fac[c];
+ if (r) { // fast-AC path
+ k += (r >> 4) & 15; // run
+ s = r & 15; // combined length
+ j->code_buffer <<= s;
+ j->code_bits -= s;
// decode into unzigzag'd location
- data[stbi__jpeg_dezigzag[k++]] = (short) stbi__extend_receive(j,s);
+ zig = stbi__jpeg_dezigzag[k++];
+ data[zig] = (short) ((r >> 8) * dequant[zig]);
+ } else {
+ int rs = stbi__jpeg_huff_decode(j, hac);
+ if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+ s = rs & 15;
+ r = rs >> 4;
+ if (s == 0) {
+ if (rs != 0xf0) break; // end block
+ k += 16;
+ } else {
+ k += r;
+ // decode into unzigzag'd location
+ zig = stbi__jpeg_dezigzag[k++];
+ data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
+ }
}
} while (k < 64);
return 1;
}
+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
+{
+ int diff,dc;
+ int t;
+ if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+ if (j->succ_high == 0) {
+ // first scan for DC coefficient, must be first
+ memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
+ t = stbi__jpeg_huff_decode(j, hdc);
+ diff = t ? stbi__extend_receive(j, t) : 0;
+
+ dc = j->img_comp[b].dc_pred + diff;
+ j->img_comp[b].dc_pred = dc;
+ data[0] = (short) (dc << j->succ_low);
+ } else {
+ // refinement scan for DC coefficient
+ if (stbi__jpeg_get_bit(j))
+ data[0] += (short) (1 << j->succ_low);
+ }
+ return 1;
+}
+
+// @OPTIMIZE: store non-zigzagged during the decode passes,
+// and only de-zigzag when dequantizing
+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
+{
+ int k;
+ if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+ if (j->succ_high == 0) {
+ int shift = j->succ_low;
+
+ if (j->eob_run) {
+ --j->eob_run;
+ return 1;
+ }
+
+ k = j->spec_start;
+ do {
+ unsigned int zig;
+ int c,r,s;
+ if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+ c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+ r = fac[c];
+ if (r) { // fast-AC path
+ k += (r >> 4) & 15; // run
+ s = r & 15; // combined length
+ j->code_buffer <<= s;
+ j->code_bits -= s;
+ zig = stbi__jpeg_dezigzag[k++];
+ data[zig] = (short) ((r >> 8) << shift);
+ } else {
+ int rs = stbi__jpeg_huff_decode(j, hac);
+ if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+ s = rs & 15;
+ r = rs >> 4;
+ if (s == 0) {
+ if (r < 15) {
+ j->eob_run = (1 << r);
+ if (r)
+ j->eob_run += stbi__jpeg_get_bits(j, r);
+ --j->eob_run;
+ break;
+ }
+ k += 16;
+ } else {
+ k += r;
+ zig = stbi__jpeg_dezigzag[k++];
+ data[zig] = (short) (stbi__extend_receive(j,s) << shift);
+ }
+ }
+ } while (k <= j->spec_end);
+ } else {
+ // refinement scan for these AC coefficients
+
+ short bit = (short) (1 << j->succ_low);
+
+ if (j->eob_run) {
+ --j->eob_run;
+ for (k = j->spec_start; k <= j->spec_end; ++k) {
+ short *p = &data[stbi__jpeg_dezigzag[k]];
+ if (*p != 0)
+ if (stbi__jpeg_get_bit(j))
+ {
+ if ((*p & bit)==0)
+ {
+ if (*p > 0)
+ *p += bit;
+ else
+ *p -= bit;
+ }
+ }
+ }
+ } else {
+ k = j->spec_start;
+ do {
+ int r,s;
+ int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
+ if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+ s = rs & 15;
+ r = rs >> 4;
+ if (s == 0) {
+ if (r < 15) {
+ j->eob_run = (1 << r) - 1;
+ if (r)
+ j->eob_run += stbi__jpeg_get_bits(j, r);
+ r = 64; // force end of block
+ } else
+ r = 16; // r=15 is the code for 16 0s
+ } else {
+ if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
+ // sign bit
+ if (stbi__jpeg_get_bit(j))
+ s = bit;
+ else
+ s = -bit;
+ }
+
+ // advance by r
+ while (k <= j->spec_end) {
+ short *p = &data[stbi__jpeg_dezigzag[k]];
+ if (*p != 0) {
+ if (stbi__jpeg_get_bit(j))
+ {
+ if ((*p & bit)==0)
+ {
+ if (*p > 0)
+ *p += bit;
+ else
+ *p -= bit;
+ }
+ }
+ ++k;
+ } else {
+ if (r == 0) {
+ if (s)
+ data[stbi__jpeg_dezigzag[k++]] = s;
+ break;
+ }
+ --r;
+ ++k;
+ }
+ }
+ } while (k <= j->spec_end);
+ }
+ }
+ return 1;
+}
+
// take a -128..127 value and stbi__clamp it and convert to 0..255
stbi_inline static stbi_uc stbi__clamp(int x)
{
@@ -1253,21 +1807,21 @@ stbi_inline static stbi_uc stbi__clamp(int x)
return (stbi_uc) x;
}
-#define stbi__f2f(x) (int) (((x) * 4096 + 0.5))
+#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
#define stbi__fsh(x) ((x) << 12)
// derived from jidctint -- DCT_ISLOW
-#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
+#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
p2 = s2; \
p3 = s6; \
- p1 = (p2+p3) * stbi__f2f(0.5411961f); \
- t2 = p1 + p3*stbi__f2f(-1.847759065f); \
- t3 = p1 + p2*stbi__f2f( 0.765366865f); \
+ p1 = (p2+p3) * stbi__f2f(0.5411961f); \
+ t2 = p1 + p3*stbi__f2f(-1.847759065f); \
+ t3 = p1 + p2*stbi__f2f( 0.765366865f); \
p2 = s0; \
p3 = s4; \
- t0 = stbi__fsh(p2+p3); \
- t1 = stbi__fsh(p2-p3); \
+ t0 = stbi__fsh(p2+p3); \
+ t1 = stbi__fsh(p2-p3); \
x0 = t0+t3; \
x3 = t0-t3; \
x1 = t1+t2; \
@@ -1280,36 +1834,28 @@ stbi_inline static stbi_uc stbi__clamp(int x)
p4 = t1+t3; \
p1 = t0+t3; \
p2 = t1+t2; \
- p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
- t0 = t0*stbi__f2f( 0.298631336f); \
- t1 = t1*stbi__f2f( 2.053119869f); \
- t2 = t2*stbi__f2f( 3.072711026f); \
- t3 = t3*stbi__f2f( 1.501321110f); \
- p1 = p5 + p1*stbi__f2f(-0.899976223f); \
- p2 = p5 + p2*stbi__f2f(-2.562915447f); \
- p3 = p3*stbi__f2f(-1.961570560f); \
- p4 = p4*stbi__f2f(-0.390180644f); \
+ p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
+ t0 = t0*stbi__f2f( 0.298631336f); \
+ t1 = t1*stbi__f2f( 2.053119869f); \
+ t2 = t2*stbi__f2f( 3.072711026f); \
+ t3 = t3*stbi__f2f( 1.501321110f); \
+ p1 = p5 + p1*stbi__f2f(-0.899976223f); \
+ p2 = p5 + p2*stbi__f2f(-2.562915447f); \
+ p3 = p3*stbi__f2f(-1.961570560f); \
+ p4 = p4*stbi__f2f(-0.390180644f); \
t3 += p1+p4; \
t2 += p2+p3; \
t1 += p2+p4; \
t0 += p1+p3;
-#ifdef STBI_SIMD
-typedef unsigned short stbi_dequantize_t;
-#else
-typedef stbi_uc stbi_dequantize_t;
-#endif
-
-// .344 seconds on 3*anemones.jpg
-static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64], stbi_dequantize_t *dequantize)
+static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
{
int i,val[64],*v=val;
- stbi_dequantize_t *dq = dequantize;
stbi_uc *o;
short *d = data;
// columns
- for (i=0; i < 8; ++i,++d,++dq, ++v) {
+ for (i=0; i < 8; ++i,++d, ++v) {
// if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
&& d[40]==0 && d[48]==0 && d[56]==0) {
@@ -1317,11 +1863,10 @@ static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64], stbi_
// (1|2|3|4|5|6|7)==0 0 seconds
// all separate -0.047 seconds
// 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
- int dcterm = d[0] * dq[0] << 2;
+ int dcterm = d[0] << 2;
v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
} else {
- STBI__IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
- d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
+ STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
// constants scaled things up by 1<<12; let's bring them back
// down, but keep 2 extra bits of precision
x0 += 512; x1 += 512; x2 += 512; x3 += 512;
@@ -1362,14 +1907,394 @@ static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64], stbi_
}
}
-#ifdef STBI_SIMD
-static stbi_idct_8x8 stbi__idct_installed = stbi__idct_block;
+#ifdef STBI_SSE2
+// sse2 integer IDCT. not the fastest possible implementation but it
+// produces bit-identical results to the generic C version so it's
+// fully "transparent".
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+ // This is constructed to match our regular (generic) integer IDCT exactly.
+ __m128i row0, row1, row2, row3, row4, row5, row6, row7;
+ __m128i tmp;
+
+ // dot product constant: even elems=x, odd elems=y
+ #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
+
+ // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
+ // out(1) = c1[even]*x + c1[odd]*y
+ #define dct_rot(out0,out1, x,y,c0,c1) \
+ __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
+ __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
+ __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
+ __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
+ __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
+ __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
+
+ // out = in << 12 (in 16-bit, out 32-bit)
+ #define dct_widen(out, in) \
+ __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
+ __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
+
+ // wide add
+ #define dct_wadd(out, a, b) \
+ __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
+ __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
+
+ // wide sub
+ #define dct_wsub(out, a, b) \
+ __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
+ __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
+
+ // butterfly a/b, add bias, then shift by "s" and pack
+ #define dct_bfly32o(out0, out1, a,b,bias,s) \
+ { \
+ __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
+ __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
+ dct_wadd(sum, abiased, b); \
+ dct_wsub(dif, abiased, b); \
+ out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
+ out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
+ }
-STBIDEF void stbi_install_idct(stbi_idct_8x8 func)
-{
- stbi__idct_installed = func;
+ // 8-bit interleave step (for transposes)
+ #define dct_interleave8(a, b) \
+ tmp = a; \
+ a = _mm_unpacklo_epi8(a, b); \
+ b = _mm_unpackhi_epi8(tmp, b)
+
+ // 16-bit interleave step (for transposes)
+ #define dct_interleave16(a, b) \
+ tmp = a; \
+ a = _mm_unpacklo_epi16(a, b); \
+ b = _mm_unpackhi_epi16(tmp, b)
+
+ #define dct_pass(bias,shift) \
+ { \
+ /* even part */ \
+ dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
+ __m128i sum04 = _mm_add_epi16(row0, row4); \
+ __m128i dif04 = _mm_sub_epi16(row0, row4); \
+ dct_widen(t0e, sum04); \
+ dct_widen(t1e, dif04); \
+ dct_wadd(x0, t0e, t3e); \
+ dct_wsub(x3, t0e, t3e); \
+ dct_wadd(x1, t1e, t2e); \
+ dct_wsub(x2, t1e, t2e); \
+ /* odd part */ \
+ dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
+ dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
+ __m128i sum17 = _mm_add_epi16(row1, row7); \
+ __m128i sum35 = _mm_add_epi16(row3, row5); \
+ dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
+ dct_wadd(x4, y0o, y4o); \
+ dct_wadd(x5, y1o, y5o); \
+ dct_wadd(x6, y2o, y5o); \
+ dct_wadd(x7, y3o, y4o); \
+ dct_bfly32o(row0,row7, x0,x7,bias,shift); \
+ dct_bfly32o(row1,row6, x1,x6,bias,shift); \
+ dct_bfly32o(row2,row5, x2,x5,bias,shift); \
+ dct_bfly32o(row3,row4, x3,x4,bias,shift); \
+ }
+
+ __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
+ __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
+ __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
+ __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
+ __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
+ __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
+ __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
+ __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
+
+ // rounding biases in column/row passes, see stbi__idct_block for explanation.
+ __m128i bias_0 = _mm_set1_epi32(512);
+ __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
+
+ // load
+ row0 = _mm_load_si128((const __m128i *) (data + 0*8));
+ row1 = _mm_load_si128((const __m128i *) (data + 1*8));
+ row2 = _mm_load_si128((const __m128i *) (data + 2*8));
+ row3 = _mm_load_si128((const __m128i *) (data + 3*8));
+ row4 = _mm_load_si128((const __m128i *) (data + 4*8));
+ row5 = _mm_load_si128((const __m128i *) (data + 5*8));
+ row6 = _mm_load_si128((const __m128i *) (data + 6*8));
+ row7 = _mm_load_si128((const __m128i *) (data + 7*8));
+
+ // column pass
+ dct_pass(bias_0, 10);
+
+ {
+ // 16bit 8x8 transpose pass 1
+ dct_interleave16(row0, row4);
+ dct_interleave16(row1, row5);
+ dct_interleave16(row2, row6);
+ dct_interleave16(row3, row7);
+
+ // transpose pass 2
+ dct_interleave16(row0, row2);
+ dct_interleave16(row1, row3);
+ dct_interleave16(row4, row6);
+ dct_interleave16(row5, row7);
+
+ // transpose pass 3
+ dct_interleave16(row0, row1);
+ dct_interleave16(row2, row3);
+ dct_interleave16(row4, row5);
+ dct_interleave16(row6, row7);
+ }
+
+ // row pass
+ dct_pass(bias_1, 17);
+
+ {
+ // pack
+ __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
+ __m128i p1 = _mm_packus_epi16(row2, row3);
+ __m128i p2 = _mm_packus_epi16(row4, row5);
+ __m128i p3 = _mm_packus_epi16(row6, row7);
+
+ // 8bit 8x8 transpose pass 1
+ dct_interleave8(p0, p2); // a0e0a1e1...
+ dct_interleave8(p1, p3); // c0g0c1g1...
+
+ // transpose pass 2
+ dct_interleave8(p0, p1); // a0c0e0g0...
+ dct_interleave8(p2, p3); // b0d0f0h0...
+
+ // transpose pass 3
+ dct_interleave8(p0, p2); // a0b0c0d0...
+ dct_interleave8(p1, p3); // a4b4c4d4...
+
+ // store
+ _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
+ _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
+ }
+
+#undef dct_const
+#undef dct_rot
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_interleave8
+#undef dct_interleave16
+#undef dct_pass
+}
+
+#endif // STBI_SSE2
+
+#ifdef STBI_NEON
+
+// NEON integer IDCT. should produce bit-identical
+// results to the generic C version.
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+ int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+ int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
+ int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
+ int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
+ int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
+ int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
+ int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
+ int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
+ int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
+ int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
+ int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
+ int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
+ int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
+
+#define dct_long_mul(out, inq, coeff) \
+ int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
+ int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
+
+#define dct_long_mac(out, acc, inq, coeff) \
+ int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
+ int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
+
+#define dct_widen(out, inq) \
+ int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
+ int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
+
+// wide add
+#define dct_wadd(out, a, b) \
+ int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
+ int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
+
+// wide sub
+#define dct_wsub(out, a, b) \
+ int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
+ int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
+
+// butterfly a/b, then shift using "shiftop" by "s" and pack
+#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
+ { \
+ dct_wadd(sum, a, b); \
+ dct_wsub(dif, a, b); \
+ out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
+ out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
+ }
+
+#define dct_pass(shiftop, shift) \
+ { \
+ /* even part */ \
+ int16x8_t sum26 = vaddq_s16(row2, row6); \
+ dct_long_mul(p1e, sum26, rot0_0); \
+ dct_long_mac(t2e, p1e, row6, rot0_1); \
+ dct_long_mac(t3e, p1e, row2, rot0_2); \
+ int16x8_t sum04 = vaddq_s16(row0, row4); \
+ int16x8_t dif04 = vsubq_s16(row0, row4); \
+ dct_widen(t0e, sum04); \
+ dct_widen(t1e, dif04); \
+ dct_wadd(x0, t0e, t3e); \
+ dct_wsub(x3, t0e, t3e); \
+ dct_wadd(x1, t1e, t2e); \
+ dct_wsub(x2, t1e, t2e); \
+ /* odd part */ \
+ int16x8_t sum15 = vaddq_s16(row1, row5); \
+ int16x8_t sum17 = vaddq_s16(row1, row7); \
+ int16x8_t sum35 = vaddq_s16(row3, row5); \
+ int16x8_t sum37 = vaddq_s16(row3, row7); \
+ int16x8_t sumodd = vaddq_s16(sum17, sum35); \
+ dct_long_mul(p5o, sumodd, rot1_0); \
+ dct_long_mac(p1o, p5o, sum17, rot1_1); \
+ dct_long_mac(p2o, p5o, sum35, rot1_2); \
+ dct_long_mul(p3o, sum37, rot2_0); \
+ dct_long_mul(p4o, sum15, rot2_1); \
+ dct_wadd(sump13o, p1o, p3o); \
+ dct_wadd(sump24o, p2o, p4o); \
+ dct_wadd(sump23o, p2o, p3o); \
+ dct_wadd(sump14o, p1o, p4o); \
+ dct_long_mac(x4, sump13o, row7, rot3_0); \
+ dct_long_mac(x5, sump24o, row5, rot3_1); \
+ dct_long_mac(x6, sump23o, row3, rot3_2); \
+ dct_long_mac(x7, sump14o, row1, rot3_3); \
+ dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
+ dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
+ dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
+ dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
+ }
+
+ // load
+ row0 = vld1q_s16(data + 0*8);
+ row1 = vld1q_s16(data + 1*8);
+ row2 = vld1q_s16(data + 2*8);
+ row3 = vld1q_s16(data + 3*8);
+ row4 = vld1q_s16(data + 4*8);
+ row5 = vld1q_s16(data + 5*8);
+ row6 = vld1q_s16(data + 6*8);
+ row7 = vld1q_s16(data + 7*8);
+
+ // add DC bias
+ row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
+
+ // column pass
+ dct_pass(vrshrn_n_s32, 10);
+
+ // 16bit 8x8 transpose
+ {
+// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
+// whether compilers actually get this is another story, sadly.
+#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
+#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
+
+ // pass 1
+ dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
+ dct_trn16(row2, row3);
+ dct_trn16(row4, row5);
+ dct_trn16(row6, row7);
+
+ // pass 2
+ dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
+ dct_trn32(row1, row3);
+ dct_trn32(row4, row6);
+ dct_trn32(row5, row7);
+
+ // pass 3
+ dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
+ dct_trn64(row1, row5);
+ dct_trn64(row2, row6);
+ dct_trn64(row3, row7);
+
+#undef dct_trn16
+#undef dct_trn32
+#undef dct_trn64
+ }
+
+ // row pass
+ // vrshrn_n_s32 only supports shifts up to 16, we need
+ // 17. so do a non-rounding shift of 16 first then follow
+ // up with a rounding shift by 1.
+ dct_pass(vshrn_n_s32, 16);
+
+ {
+ // pack and round
+ uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
+ uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
+ uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
+ uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
+ uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
+ uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
+ uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
+ uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
+
+ // again, these can translate into one instruction, but often don't.
+#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
+#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
+
+ // sadly can't use interleaved stores here since we only write
+ // 8 bytes to each scan line!
+
+ // 8x8 8-bit transpose pass 1
+ dct_trn8_8(p0, p1);
+ dct_trn8_8(p2, p3);
+ dct_trn8_8(p4, p5);
+ dct_trn8_8(p6, p7);
+
+ // pass 2
+ dct_trn8_16(p0, p2);
+ dct_trn8_16(p1, p3);
+ dct_trn8_16(p4, p6);
+ dct_trn8_16(p5, p7);
+
+ // pass 3
+ dct_trn8_32(p0, p4);
+ dct_trn8_32(p1, p5);
+ dct_trn8_32(p2, p6);
+ dct_trn8_32(p3, p7);
+
+ // store
+ vst1_u8(out, p0); out += out_stride;
+ vst1_u8(out, p1); out += out_stride;
+ vst1_u8(out, p2); out += out_stride;
+ vst1_u8(out, p3); out += out_stride;
+ vst1_u8(out, p4); out += out_stride;
+ vst1_u8(out, p5); out += out_stride;
+ vst1_u8(out, p6); out += out_stride;
+ vst1_u8(out, p7);
+
+#undef dct_trn8_8
+#undef dct_trn8_16
+#undef dct_trn8_32
+ }
+
+#undef dct_long_mul
+#undef dct_long_mac
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_pass
}
-#endif
+
+#endif // STBI_NEON
#define STBI__MARKER_none 0xff
// if there's a pending marker from the entropy stream, return that
@@ -1400,6 +2325,7 @@ static void stbi__jpeg_reset(stbi__jpeg *j)
j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
j->marker = STBI__MARKER_none;
j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
+ j->eob_run = 0;
// no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
// since we don't even allow 1<<30 pixels
}
@@ -1407,73 +2333,152 @@ static void stbi__jpeg_reset(stbi__jpeg *j)
static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
{
stbi__jpeg_reset(z);
- if (z->scan_n == 1) {
- int i,j;
- #ifdef STBI_SIMD
- __declspec(align(16))
- #endif
- short data[64];
- int n = z->order[0];
- // non-interleaved data, we just need to process one block at a time,
- // in trivial scanline order
- // number of blocks to do just depends on how many actual "pixels" this
- // component has, independent of interleaved MCU blocking and such
- int w = (z->img_comp[n].x+7) >> 3;
- int h = (z->img_comp[n].y+7) >> 3;
- for (j=0; j < h; ++j) {
- for (i=0; i < w; ++i) {
- if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
- #ifdef STBI_SIMD
- stbi__idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
- #else
- stbi__idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
- #endif
- // every data block is an MCU, so countdown the restart interval
- if (--z->todo <= 0) {
- if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
- // if it's NOT a restart, then just bail, so we get corrupt data
- // rather than no data
- if (!STBI__RESTART(z->marker)) return 1;
- stbi__jpeg_reset(z);
+ if (!z->progressive) {
+ if (z->scan_n == 1) {
+ int i,j;
+ STBI_SIMD_ALIGN(short, data[64]);
+ int n = z->order[0];
+ // non-interleaved data, we just need to process one block at a time,
+ // in trivial scanline order
+ // number of blocks to do just depends on how many actual "pixels" this
+ // component has, independent of interleaved MCU blocking and such
+ int w = (z->img_comp[n].x+7) >> 3;
+ int h = (z->img_comp[n].y+7) >> 3;
+ for (j=0; j < h; ++j) {
+ for (i=0; i < w; ++i) {
+ int ha = z->img_comp[n].ha;
+ if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+ z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+ // every data block is an MCU, so countdown the restart interval
+ if (--z->todo <= 0) {
+ if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+ // if it's NOT a restart, then just bail, so we get corrupt data
+ // rather than no data
+ if (!STBI__RESTART(z->marker)) return 1;
+ stbi__jpeg_reset(z);
+ }
+ }
+ }
+ return 1;
+ } else { // interleaved
+ int i,j,k,x,y;
+ STBI_SIMD_ALIGN(short, data[64]);
+ for (j=0; j < z->img_mcu_y; ++j) {
+ for (i=0; i < z->img_mcu_x; ++i) {
+ // scan an interleaved mcu... process scan_n components in order
+ for (k=0; k < z->scan_n; ++k) {
+ int n = z->order[k];
+ // scan out an mcu's worth of this component; that's just determined
+ // by the basic H and V specified for the component
+ for (y=0; y < z->img_comp[n].v; ++y) {
+ for (x=0; x < z->img_comp[n].h; ++x) {
+ int x2 = (i*z->img_comp[n].h + x)*8;
+ int y2 = (j*z->img_comp[n].v + y)*8;
+ int ha = z->img_comp[n].ha;
+ if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+ z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
+ }
+ }
+ }
+ // after all interleaved components, that's an interleaved MCU,
+ // so now count down the restart interval
+ if (--z->todo <= 0) {
+ if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+ if (!STBI__RESTART(z->marker)) return 1;
+ stbi__jpeg_reset(z);
+ }
}
}
+ return 1;
}
- } else { // interleaved!
- int i,j,k,x,y;
- short data[64];
- for (j=0; j < z->img_mcu_y; ++j) {
- for (i=0; i < z->img_mcu_x; ++i) {
- // scan an interleaved mcu... process scan_n components in order
- for (k=0; k < z->scan_n; ++k) {
- int n = z->order[k];
- // scan out an mcu's worth of this component; that's just determined
- // by the basic H and V specified for the component
- for (y=0; y < z->img_comp[n].v; ++y) {
- for (x=0; x < z->img_comp[n].h; ++x) {
- int x2 = (i*z->img_comp[n].h + x)*8;
- int y2 = (j*z->img_comp[n].v + y)*8;
- if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
- #ifdef STBI_SIMD
- stbi__idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
- #else
- stbi__idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
- #endif
+ } else {
+ if (z->scan_n == 1) {
+ int i,j;
+ int n = z->order[0];
+ // non-interleaved data, we just need to process one block at a time,
+ // in trivial scanline order
+ // number of blocks to do just depends on how many actual "pixels" this
+ // component has, independent of interleaved MCU blocking and such
+ int w = (z->img_comp[n].x+7) >> 3;
+ int h = (z->img_comp[n].y+7) >> 3;
+ for (j=0; j < h; ++j) {
+ for (i=0; i < w; ++i) {
+ short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+ if (z->spec_start == 0) {
+ if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+ return 0;
+ } else {
+ int ha = z->img_comp[n].ha;
+ if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
+ return 0;
+ }
+ // every data block is an MCU, so countdown the restart interval
+ if (--z->todo <= 0) {
+ if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+ if (!STBI__RESTART(z->marker)) return 1;
+ stbi__jpeg_reset(z);
+ }
+ }
+ }
+ return 1;
+ } else { // interleaved
+ int i,j,k,x,y;
+ for (j=0; j < z->img_mcu_y; ++j) {
+ for (i=0; i < z->img_mcu_x; ++i) {
+ // scan an interleaved mcu... process scan_n components in order
+ for (k=0; k < z->scan_n; ++k) {
+ int n = z->order[k];
+ // scan out an mcu's worth of this component; that's just determined
+ // by the basic H and V specified for the component
+ for (y=0; y < z->img_comp[n].v; ++y) {
+ for (x=0; x < z->img_comp[n].h; ++x) {
+ int x2 = (i*z->img_comp[n].h + x);
+ int y2 = (j*z->img_comp[n].v + y);
+ //int ha = z->img_comp[n].ha; // RaySan: Unused, commented to avoid warning
+ short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
+ if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+ return 0;
+ }
}
}
+ // after all interleaved components, that's an interleaved MCU,
+ // so now count down the restart interval
+ if (--z->todo <= 0) {
+ if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+ if (!STBI__RESTART(z->marker)) return 1;
+ stbi__jpeg_reset(z);
+ }
}
- // after all interleaved components, that's an interleaved MCU,
- // so now count down the restart interval
- if (--z->todo <= 0) {
- if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
- // if it's NOT a restart, then just bail, so we get corrupt data
- // rather than no data
- if (!STBI__RESTART(z->marker)) return 1;
- stbi__jpeg_reset(z);
+ }
+ return 1;
+ }
+ }
+}
+
+static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
+{
+ int i;
+ for (i=0; i < 64; ++i)
+ data[i] *= dequant[i];
+}
+
+static void stbi__jpeg_finish(stbi__jpeg *z)
+{
+ if (z->progressive) {
+ // dequantize and idct the data
+ int i,j,n;
+ for (n=0; n < z->s->img_n; ++n) {
+ int w = (z->img_comp[n].x+7) >> 3;
+ int h = (z->img_comp[n].y+7) >> 3;
+ for (j=0; j < h; ++j) {
+ for (i=0; i < w; ++i) {
+ short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+ stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
+ z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
}
}
}
}
- return 1;
}
static int stbi__process_marker(stbi__jpeg *z, int m)
@@ -1483,9 +2488,6 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
case STBI__MARKER_none: // no marker found
return stbi__err("expected marker","Corrupt JPEG");
- case 0xC2: // stbi__SOF - progressive
- return stbi__err("progressive jpeg","JPEG format not supported (progressive)");
-
case 0xDD: // DRI - specify restart interval
if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
z->restart_interval = stbi__get16be(z->s);
@@ -1501,10 +2503,6 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
for (i=0; i < 64; ++i)
z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s);
- #ifdef STBI_SIMD
- for (i=0; i < 64; ++i)
- z->dequant2[t][i] = z->dequant[t][i];
- #endif
L -= 65;
}
return L==0;
@@ -1532,6 +2530,8 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
}
for (i=0; i < n; ++i)
v[i] = stbi__get8(z->s);
+ if (tc != 0)
+ stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
L -= n;
}
return L==0;
@@ -1544,28 +2544,42 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
return 0;
}
-// after we see stbi__SOS
+// after we see SOS
static int stbi__process_scan_header(stbi__jpeg *z)
{
int i;
int Ls = stbi__get16be(z->s);
z->scan_n = stbi__get8(z->s);
- if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad stbi__SOS component count","Corrupt JPEG");
- if (Ls != 6+2*z->scan_n) return stbi__err("bad stbi__SOS len","Corrupt JPEG");
+ if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
+ if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
for (i=0; i < z->scan_n; ++i) {
int id = stbi__get8(z->s), which;
int q = stbi__get8(z->s);
for (which = 0; which < z->s->img_n; ++which)
if (z->img_comp[which].id == id)
break;
- if (which == z->s->img_n) return 0;
+ if (which == z->s->img_n) return 0; // no match
z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
z->order[i] = which;
}
- if (stbi__get8(z->s) != 0) return stbi__err("bad stbi__SOS","Corrupt JPEG");
- stbi__get8(z->s); // should be 63, but might be 0
- if (stbi__get8(z->s) != 0) return stbi__err("bad stbi__SOS","Corrupt JPEG");
+
+ {
+ int aa;
+ z->spec_start = stbi__get8(z->s);
+ z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
+ aa = stbi__get8(z->s);
+ z->succ_high = (aa >> 4);
+ z->succ_low = (aa & 15);
+ if (z->progressive) {
+ if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
+ return stbi__err("bad SOS", "Corrupt JPEG");
+ } else {
+ if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
+ if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
+ z->spec_end = 63;
+ }
+ }
return 1;
}
@@ -1574,8 +2588,8 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
{
stbi__context *s = z->s;
int Lf,p,i,q, h_max=1,v_max=1,c;
- Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad stbi__SOF len","Corrupt JPEG"); // JPEG
- p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
+ Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
+ p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
c = stbi__get8(s);
@@ -1586,7 +2600,7 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
z->img_comp[i].linebuf = NULL;
}
- if (Lf != 8+3*s->img_n) return stbi__err("bad stbi__SOF len","Corrupt JPEG");
+ if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
for (i=0; i < s->img_n; ++i) {
z->img_comp[i].id = stbi__get8(s);
@@ -1599,7 +2613,7 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
}
- if (scan != SCAN_load) return 1;
+ if (scan != STBI__SCAN_load) return 1;
if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
@@ -1627,54 +2641,68 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
+
if (z->img_comp[i].raw_data == NULL) {
for(--i; i >= 0; --i) {
- free(z->img_comp[i].raw_data);
+ STBI_FREE(z->img_comp[i].raw_data);
z->img_comp[i].data = NULL;
}
return stbi__err("outofmem", "Out of memory");
}
- // align blocks for installable-idct using mmx/sse
+ // align blocks for idct using mmx/sse
z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
z->img_comp[i].linebuf = NULL;
+ if (z->progressive) {
+ z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
+ z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
+ z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
+ z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
+ } else {
+ z->img_comp[i].coeff = 0;
+ z->img_comp[i].raw_coeff = 0;
+ }
}
return 1;
}
-// use comparisons since in some cases we handle more than one case (e.g. stbi__SOF)
+// use comparisons since in some cases we handle more than one case (e.g. SOF)
#define stbi__DNL(x) ((x) == 0xdc)
#define stbi__SOI(x) ((x) == 0xd8)
#define stbi__EOI(x) ((x) == 0xd9)
-#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1)
+#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
#define stbi__SOS(x) ((x) == 0xda)
-static int decode_jpeg_header(stbi__jpeg *z, int scan)
+#define stbi__SOF_progressive(x) ((x) == 0xc2)
+
+static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
{
int m;
z->marker = STBI__MARKER_none; // initialize cached marker to empty
m = stbi__get_marker(z);
- if (!stbi__SOI(m)) return stbi__err("no stbi__SOI","Corrupt JPEG");
- if (scan == SCAN_type) return 1;
+ if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
+ if (scan == STBI__SCAN_type) return 1;
m = stbi__get_marker(z);
while (!stbi__SOF(m)) {
if (!stbi__process_marker(z,m)) return 0;
m = stbi__get_marker(z);
while (m == STBI__MARKER_none) {
// some files have extra padding after their blocks, so ok, we'll scan
- if (stbi__at_eof(z->s)) return stbi__err("no stbi__SOF", "Corrupt JPEG");
+ if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
m = stbi__get_marker(z);
}
}
+ z->progressive = stbi__SOF_progressive(m);
if (!stbi__process_frame_header(z, scan)) return 0;
return 1;
}
-static int decode_jpeg_image(stbi__jpeg *j)
+// decode image to YCbCr format
+static int stbi__decode_jpeg_image(stbi__jpeg *j)
{
int m;
j->restart_interval = 0;
- if (!decode_jpeg_header(j, SCAN_load)) return 0;
+ if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
m = stbi__get_marker(j);
while (!stbi__EOI(m)) {
if (stbi__SOS(m)) {
@@ -1688,7 +2716,7 @@ static int decode_jpeg_image(stbi__jpeg *j)
j->marker = stbi__get8(j->s);
break;
} else if (x != 0) {
- return 0;
+ return stbi__err("junk before marker", "Corrupt JPEG");
}
}
// if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
@@ -1698,6 +2726,8 @@ static int decode_jpeg_image(stbi__jpeg *j)
}
m = stbi__get_marker(j);
}
+ if (j->progressive)
+ stbi__jpeg_finish(j);
return 1;
}
@@ -1781,6 +2811,123 @@ static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc
return out;
}
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+ // need to generate 2x2 samples for every one in input
+ int i=0,t0,t1;
+
+ if (w == 1) {
+ out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+ return out;
+ }
+
+ t1 = 3*in_near[0] + in_far[0];
+ // process groups of 8 pixels for as long as we can.
+ // note we can't handle the last pixel in a row in this loop
+ // because we need to handle the filter boundary conditions.
+ for (; i < ((w-1) & ~7); i += 8) {
+#if defined(STBI_SSE2)
+ // load and perform the vertical filtering pass
+ // this uses 3*x + y = 4*x + (y - x)
+ __m128i zero = _mm_setzero_si128();
+ __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
+ __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
+ __m128i farw = _mm_unpacklo_epi8(farb, zero);
+ __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
+ __m128i diff = _mm_sub_epi16(farw, nearw);
+ __m128i nears = _mm_slli_epi16(nearw, 2);
+ __m128i curr = _mm_add_epi16(nears, diff); // current row
+
+ // horizontal filter works the same based on shifted vers of current
+ // row. "prev" is current row shifted right by 1 pixel; we need to
+ // insert the previous pixel value (from t1).
+ // "next" is current row shifted left by 1 pixel, with first pixel
+ // of next block of 8 pixels added in.
+ __m128i prv0 = _mm_slli_si128(curr, 2);
+ __m128i nxt0 = _mm_srli_si128(curr, 2);
+ __m128i prev = _mm_insert_epi16(prv0, t1, 0);
+ __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
+
+ // horizontal filter, polyphase implementation since it's convenient:
+ // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+ // odd pixels = 3*cur + next = cur*4 + (next - cur)
+ // note the shared term.
+ __m128i bias = _mm_set1_epi16(8);
+ __m128i curs = _mm_slli_epi16(curr, 2);
+ __m128i prvd = _mm_sub_epi16(prev, curr);
+ __m128i nxtd = _mm_sub_epi16(next, curr);
+ __m128i curb = _mm_add_epi16(curs, bias);
+ __m128i even = _mm_add_epi16(prvd, curb);
+ __m128i odd = _mm_add_epi16(nxtd, curb);
+
+ // interleave even and odd pixels, then undo scaling.
+ __m128i int0 = _mm_unpacklo_epi16(even, odd);
+ __m128i int1 = _mm_unpackhi_epi16(even, odd);
+ __m128i de0 = _mm_srli_epi16(int0, 4);
+ __m128i de1 = _mm_srli_epi16(int1, 4);
+
+ // pack and write output
+ __m128i outv = _mm_packus_epi16(de0, de1);
+ _mm_storeu_si128((__m128i *) (out + i*2), outv);
+#elif defined(STBI_NEON)
+ // load and perform the vertical filtering pass
+ // this uses 3*x + y = 4*x + (y - x)
+ uint8x8_t farb = vld1_u8(in_far + i);
+ uint8x8_t nearb = vld1_u8(in_near + i);
+ int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
+ int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
+ int16x8_t curr = vaddq_s16(nears, diff); // current row
+
+ // horizontal filter works the same based on shifted vers of current
+ // row. "prev" is current row shifted right by 1 pixel; we need to
+ // insert the previous pixel value (from t1).
+ // "next" is current row shifted left by 1 pixel, with first pixel
+ // of next block of 8 pixels added in.
+ int16x8_t prv0 = vextq_s16(curr, curr, 7);
+ int16x8_t nxt0 = vextq_s16(curr, curr, 1);
+ int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
+ int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
+
+ // horizontal filter, polyphase implementation since it's convenient:
+ // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+ // odd pixels = 3*cur + next = cur*4 + (next - cur)
+ // note the shared term.
+ int16x8_t curs = vshlq_n_s16(curr, 2);
+ int16x8_t prvd = vsubq_s16(prev, curr);
+ int16x8_t nxtd = vsubq_s16(next, curr);
+ int16x8_t even = vaddq_s16(curs, prvd);
+ int16x8_t odd = vaddq_s16(curs, nxtd);
+
+ // undo scaling and round, then store with even/odd phases interleaved
+ uint8x8x2_t o;
+ o.val[0] = vqrshrun_n_s16(even, 4);
+ o.val[1] = vqrshrun_n_s16(odd, 4);
+ vst2_u8(out + i*2, o);
+#endif
+
+ // "previous" value for next iter
+ t1 = 3*in_near[i+7] + in_far[i+7];
+ }
+
+ t0 = t1;
+ t1 = 3*in_near[i] + in_far[i];
+ out[i*2] = stbi__div16(3*t1 + t0 + 8);
+
+ for (++i; i < w; ++i) {
+ t0 = t1;
+ t1 = 3*in_near[i]+in_far[i];
+ out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+ out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
+ }
+ out[w*2-1] = stbi__div4(t1+2);
+
+ STBI_NOTUSED(hs);
+
+ return out;
+}
+#endif
+
static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
{
// resample with nearest-neighbor
@@ -1792,10 +2939,10 @@ static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_
return out;
}
+#ifdef STBI_JPEG_OLD
+// this is the same YCbCr-to-RGB calculation that stb_image has used
+// historically before the algorithm changes in 1.49
#define float2fixed(x) ((int) ((x) * 65536 + 0.5))
-
-// 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro)
-// VC6 without processor=Pro is generating multiple LEAs per multiply!
static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
{
int i;
@@ -1820,16 +2967,196 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc
out += step;
}
}
+#else
+// this is a reduced-precision calculation of YCbCr-to-RGB introduced
+// to make sure the code produces the same results in both SIMD and scalar
+#define float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
+static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
+{
+ int i;
+ for (i=0; i < count; ++i) {
+ int y_fixed = (y[i] << 20) + (1<<19); // rounding
+ int r,g,b;
+ int cr = pcr[i] - 128;
+ int cb = pcb[i] - 128;
+ r = y_fixed + cr* float2fixed(1.40200f);
+ g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
+ b = y_fixed + cb* float2fixed(1.77200f);
+ r >>= 20;
+ g >>= 20;
+ b >>= 20;
+ if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+ if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+ if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+ out[0] = (stbi_uc)r;
+ out[1] = (stbi_uc)g;
+ out[2] = (stbi_uc)b;
+ out[3] = 255;
+ out += step;
+ }
+}
+#endif
-#ifdef STBI_SIMD
-static stbi_YCbCr_to_RGB_run stbi__YCbCr_installed = stbi__YCbCr_to_RGB_row;
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
+{
+ int i = 0;
+
+#ifdef STBI_SSE2
+ // step == 3 is pretty ugly on the final interleave, and i'm not convinced
+ // it's useful in practice (you wouldn't use it for textures, for example).
+ // so just accelerate step == 4 case.
+ if (step == 4) {
+ // this is a fairly straightforward implementation and not super-optimized.
+ __m128i signflip = _mm_set1_epi8(-0x80);
+ __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
+ __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
+ __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
+ __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
+ __m128i y_bias = _mm_set1_epi8((char) 128);
+ __m128i xw = _mm_set1_epi16(255); // alpha channel
+
+ for (; i+7 < count; i += 8) {
+ // load
+ __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
+ __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
+ __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
+ __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
+ __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
+
+ // unpack to short (and left-shift cr, cb by 8)
+ __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
+ __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
+ __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
+
+ // color transform
+ __m128i yws = _mm_srli_epi16(yw, 4);
+ __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
+ __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
+ __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
+ __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
+ __m128i rws = _mm_add_epi16(cr0, yws);
+ __m128i gwt = _mm_add_epi16(cb0, yws);
+ __m128i bws = _mm_add_epi16(yws, cb1);
+ __m128i gws = _mm_add_epi16(gwt, cr1);
+
+ // descale
+ __m128i rw = _mm_srai_epi16(rws, 4);
+ __m128i bw = _mm_srai_epi16(bws, 4);
+ __m128i gw = _mm_srai_epi16(gws, 4);
+
+ // back to byte, set up for transpose
+ __m128i brb = _mm_packus_epi16(rw, bw);
+ __m128i gxb = _mm_packus_epi16(gw, xw);
+
+ // transpose to interleave channels
+ __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
+ __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
+ __m128i o0 = _mm_unpacklo_epi16(t0, t1);
+ __m128i o1 = _mm_unpackhi_epi16(t0, t1);
+
+ // store
+ _mm_storeu_si128((__m128i *) (out + 0), o0);
+ _mm_storeu_si128((__m128i *) (out + 16), o1);
+ out += 32;
+ }
+ }
+#endif
-STBIDEF void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
-{
- stbi__YCbCr_installed = func;
+#ifdef STBI_NEON
+ // in this version, step=3 support would be easy to add. but is there demand?
+ if (step == 4) {
+ // this is a fairly straightforward implementation and not super-optimized.
+ uint8x8_t signflip = vdup_n_u8(0x80);
+ int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
+ int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
+ int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
+ int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
+
+ for (; i+7 < count; i += 8) {
+ // load
+ uint8x8_t y_bytes = vld1_u8(y + i);
+ uint8x8_t cr_bytes = vld1_u8(pcr + i);
+ uint8x8_t cb_bytes = vld1_u8(pcb + i);
+ int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
+ int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
+
+ // expand to s16
+ int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
+ int16x8_t crw = vshll_n_s8(cr_biased, 7);
+ int16x8_t cbw = vshll_n_s8(cb_biased, 7);
+
+ // color transform
+ int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
+ int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
+ int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
+ int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
+ int16x8_t rws = vaddq_s16(yws, cr0);
+ int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
+ int16x8_t bws = vaddq_s16(yws, cb1);
+
+ // undo scaling, round, convert to byte
+ uint8x8x4_t o;
+ o.val[0] = vqrshrun_n_s16(rws, 4);
+ o.val[1] = vqrshrun_n_s16(gws, 4);
+ o.val[2] = vqrshrun_n_s16(bws, 4);
+ o.val[3] = vdup_n_u8(255);
+
+ // store, interleaving r/g/b/a
+ vst4_u8(out, o);
+ out += 8*4;
+ }
+ }
+#endif
+
+ for (; i < count; ++i) {
+ int y_fixed = (y[i] << 20) + (1<<19); // rounding
+ int r,g,b;
+ int cr = pcr[i] - 128;
+ int cb = pcb[i] - 128;
+ r = y_fixed + cr* float2fixed(1.40200f);
+ g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
+ b = y_fixed + cb* float2fixed(1.77200f);
+ r >>= 20;
+ g >>= 20;
+ b >>= 20;
+ if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+ if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+ if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+ out[0] = (stbi_uc)r;
+ out[1] = (stbi_uc)g;
+ out[2] = (stbi_uc)b;
+ out[3] = 255;
+ out += step;
+ }
}
#endif
+// set up the kernels
+static void stbi__setup_jpeg(stbi__jpeg *j)
+{
+ j->idct_block_kernel = stbi__idct_block;
+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
+
+#ifdef STBI_SSE2
+ if (stbi__sse2_available()) {
+ j->idct_block_kernel = stbi__idct_simd;
+ #ifndef STBI_JPEG_OLD
+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+ #endif
+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+ }
+#endif
+
+#ifdef STBI_NEON
+ j->idct_block_kernel = stbi__idct_simd;
+ #ifndef STBI_JPEG_OLD
+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+ #endif
+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+#endif
+}
// clean up the temporary component buffers
static void stbi__cleanup_jpeg(stbi__jpeg *j)
@@ -1837,12 +3164,17 @@ static void stbi__cleanup_jpeg(stbi__jpeg *j)
int i;
for (i=0; i < j->s->img_n; ++i) {
if (j->img_comp[i].raw_data) {
- free(j->img_comp[i].raw_data);
+ STBI_FREE(j->img_comp[i].raw_data);
j->img_comp[i].raw_data = NULL;
j->img_comp[i].data = NULL;
}
+ if (j->img_comp[i].raw_coeff) {
+ STBI_FREE(j->img_comp[i].raw_coeff);
+ j->img_comp[i].raw_coeff = 0;
+ j->img_comp[i].coeff = 0;
+ }
if (j->img_comp[i].linebuf) {
- free(j->img_comp[i].linebuf);
+ STBI_FREE(j->img_comp[i].linebuf);
j->img_comp[i].linebuf = NULL;
}
}
@@ -1853,7 +3185,7 @@ typedef struct
resample_row_func resample;
stbi_uc *line0,*line1;
int hs,vs; // expansion factor in each axis
- int w_lores; // horizontal pixels pre-expansion
+ int w_lores; // horizontal pixels pre-expansion
int ystep; // how far through vertical expansion we are
int ypos; // which pre-expansion row we're on
} stbi__resample;
@@ -1866,8 +3198,8 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
// validate req_comp
if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
- // load a jpeg image from whichever source
- if (!decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
+ // load a jpeg image from whichever source, but leave in YCbCr format
+ if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
// determine actual number of components to generate
n = req_comp ? req_comp : z->s->img_n;
@@ -1904,7 +3236,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
- else if (r->hs == 2 && r->vs == 2) r->resample = stbi__resample_row_hv_2;
+ else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
else r->resample = stbi__resample_row_generic;
}
@@ -1932,11 +3264,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
if (n >= 3) {
stbi_uc *y = coutput[0];
if (z->s->img_n == 3) {
- #ifdef STBI_SIMD
- stbi__YCbCr_installed(out, y, coutput[1], coutput[2], z->s->img_x, n);
- #else
- stbi__YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s->img_x, n);
- #endif
+ z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
} else
for (i=0; i < z->s->img_x; ++i) {
out[0] = out[1] = out[2] = y[i];
@@ -1963,6 +3291,7 @@ static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *com
{
stbi__jpeg j;
j.s = s;
+ stbi__setup_jpeg(&j);
return load_jpeg_image(&j, x,y,comp,req_comp);
}
@@ -1971,14 +3300,15 @@ static int stbi__jpeg_test(stbi__context *s)
int r;
stbi__jpeg j;
j.s = s;
- r = decode_jpeg_header(&j, SCAN_type);
+ stbi__setup_jpeg(&j);
+ r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
stbi__rewind(s);
return r;
}
static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
{
- if (!decode_jpeg_header(j, SCAN_header)) {
+ if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
stbi__rewind( j->s );
return 0;
}
@@ -1994,6 +3324,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
j.s = s;
return stbi__jpeg_info_raw(&j, x, y, comp);
}
+#endif
// public domain zlib decode v0.2 Sean Barrett 2006-11-18
// simple implementation
@@ -2002,6 +3333,8 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
// performance
// - fast huffman
+#ifndef STBI_NO_ZLIB
+
// fast-way is faster to check than jpeg huffman, but slow way is slower
#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
@@ -2015,7 +3348,7 @@ typedef struct
int maxcode[17];
stbi__uint16 firstsymbol[16];
stbi_uc size[288];
- stbi__uint16 value[288];
+ stbi__uint16 value[288];
} stbi__zhuffman;
stbi_inline static int stbi__bitreverse16(int n)
@@ -2042,8 +3375,8 @@ static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
// DEFLATE spec for generating codes
memset(sizes, 0, sizeof(sizes));
- memset(z->fast, 255, sizeof(z->fast));
- for (i=0; i < num; ++i)
+ memset(z->fast, 0, sizeof(z->fast));
+ for (i=0; i < num; ++i)
++sizes[sizelist[i]];
sizes[0] = 0;
for (i=1; i < 16; ++i)
@@ -2065,12 +3398,13 @@ static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
int s = sizelist[i];
if (s) {
int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
+ stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
z->size [c] = (stbi_uc ) s;
z->value[c] = (stbi__uint16) i;
if (s <= STBI__ZFAST_BITS) {
int k = stbi__bit_reverse(next_code[s],s);
while (k < (1 << STBI__ZFAST_BITS)) {
- z->fast[k] = (stbi__uint16) c;
+ z->fast[k] = fastv;
k += (1 << s);
}
}
@@ -2122,21 +3456,12 @@ stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
k = z->code_buffer & ((1 << n) - 1);
z->code_buffer >>= n;
z->num_bits -= n;
- return k;
+ return k;
}
-stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
+static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
{
int b,s,k;
- if (a->num_bits < 16) stbi__fill_bits(a);
- b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
- if (b < 0xffff) {
- s = z->size[b];
- a->code_buffer >>= s;
- a->num_bits -= s;
- return z->value[b];
- }
-
// not resolved by fast table, so compute it the slow way
// use jpeg approach, which requires MSbits at top
k = stbi__bit_reverse(a->code_buffer, 16);
@@ -2152,16 +3477,31 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
return z->value[b];
}
-static int stbi__zexpand(stbi__zbuf *z, int n) // need to make room for n bytes
+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
+{
+ int b,s;
+ if (a->num_bits < 16) stbi__fill_bits(a);
+ b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
+ if (b) {
+ s = b >> 9;
+ a->code_buffer >>= s;
+ a->num_bits -= s;
+ return b & 511;
+ }
+ return stbi__zhuffman_decode_slowpath(a, z);
+}
+
+static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
{
char *q;
int cur, limit;
+ z->zout = zout;
if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
cur = (int) (z->zout - z->zout_start);
limit = (int) (z->zout_end - z->zout_start);
while (cur + n > limit)
limit *= 2;
- q = (char *) realloc(z->zout_start, limit);
+ q = (char *) STBI_REALLOC(z->zout_start, limit);
if (q == NULL) return stbi__err("outofmem", "Out of memory");
z->zout_start = q;
z->zout = q + cur;
@@ -2174,7 +3514,7 @@ static int stbi__zlength_base[31] = {
15,17,19,23,27,31,35,43,51,59,
67,83,99,115,131,163,195,227,258,0,0 };
-static int stbi__zlength_extra[31]=
+static int stbi__zlength_extra[31]=
{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
@@ -2185,16 +3525,23 @@ static int stbi__zdist_extra[32] =
static int stbi__parse_huffman_block(stbi__zbuf *a)
{
+ char *zout = a->zout;
for(;;) {
int z = stbi__zhuffman_decode(a, &a->z_length);
if (z < 256) {
if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
- if (a->zout >= a->zout_end) if (!stbi__zexpand(a, 1)) return 0;
- *a->zout++ = (char) z;
+ if (zout >= a->zout_end) {
+ if (!stbi__zexpand(a, zout, 1)) return 0;
+ zout = a->zout;
+ }
+ *zout++ = (char) z;
} else {
stbi_uc *p;
int len,dist;
- if (z == 256) return 1;
+ if (z == 256) {
+ a->zout = zout;
+ return 1;
+ }
z -= 257;
len = stbi__zlength_base[z];
if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
@@ -2202,11 +3549,18 @@ static int stbi__parse_huffman_block(stbi__zbuf *a)
if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
dist = stbi__zdist_base[z];
if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
- if (a->zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
- if (a->zout + len > a->zout_end) if (!stbi__zexpand(a, len)) return 0;
- p = (stbi_uc *) (a->zout - dist);
- while (len--)
- *a->zout++ = *p++;
+ if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
+ if (zout + len > a->zout_end) {
+ if (!stbi__zexpand(a, zout, len)) return 0;
+ zout = a->zout;
+ }
+ p = (stbi_uc *) (zout - dist);
+ if (dist == 1) { // run of one byte; common in images.
+ stbi_uc v = *p;
+ do *zout++ = v; while (--len);
+ } else {
+ do *zout++ = *p++; while (--len);
+ }
}
}
}
@@ -2279,7 +3633,7 @@ static int stbi__parse_uncomperssed_block(stbi__zbuf *a)
if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
if (a->zout + len > a->zout_end)
- if (!stbi__zexpand(a, len)) return 0;
+ if (!stbi__zexpand(a, a->zout, len)) return 0;
memcpy(a->zout, a->zbuffer, len);
a->zbuffer += len;
a->zout += len;
@@ -2362,7 +3716,7 @@ STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int
if (outlen) *outlen = (int) (a.zout - a.zout_start);
return a.zout_start;
} else {
- free(a.zout_start);
+ STBI_FREE(a.zout_start);
return NULL;
}
}
@@ -2383,7 +3737,7 @@ STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, i
if (outlen) *outlen = (int) (a.zout - a.zout_start);
return a.zout_start;
} else {
- free(a.zout_start);
+ STBI_FREE(a.zout_start);
return NULL;
}
}
@@ -2410,7 +3764,7 @@ STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int
if (outlen) *outlen = (int) (a.zout - a.zout_start);
return a.zout_start;
} else {
- free(a.zout_start);
+ STBI_FREE(a.zout_start);
return NULL;
}
}
@@ -2425,6 +3779,7 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char
else
return -1;
}
+#endif
// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
// simple implementation
@@ -2436,15 +3791,13 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char
// performance
// - uses stb_zlib, a PD zlib implementation with fast huffman decoding
-
+#ifndef STBI_NO_PNG
typedef struct
{
stbi__uint32 length;
stbi__uint32 type;
} stbi__pngchunk;
-#define PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
-
static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
{
stbi__pngchunk c;
@@ -2470,13 +3823,23 @@ typedef struct
enum {
- STBI__F_none=0, STBI__F_sub=1, STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4,
- STBI__F_avg_first, STBI__F_paeth_first
+ STBI__F_none=0,
+ STBI__F_sub=1,
+ STBI__F_up=2,
+ STBI__F_avg=3,
+ STBI__F_paeth=4,
+ // synthetic filters used for first scanline to avoid needing a dummy row of 0s
+ STBI__F_avg_first,
+ STBI__F_paeth_first
};
static stbi_uc first_row_filter[5] =
{
- STBI__F_none, STBI__F_sub, STBI__F_none, STBI__F_avg_first, STBI__F_paeth_first
+ STBI__F_none,
+ STBI__F_sub,
+ STBI__F_none,
+ STBI__F_avg_first,
+ STBI__F_paeth_first
};
static int stbi__paeth(int a, int b, int c)
@@ -2490,32 +3853,50 @@ static int stbi__paeth(int a, int b, int c)
return c;
}
-#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
+static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
// create the png data from post-deflated data
-static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y)
+static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
{
stbi__context *s = a->s;
stbi__uint32 i,j,stride = x*out_n;
+ stbi__uint32 img_len, img_width_bytes;
int k;
int img_n = s->img_n; // copy it into a local for later
+
STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
- a->out = (stbi_uc *) stbi__malloc(x * y * out_n);
+ a->out = (stbi_uc *) stbi__malloc(x * y * out_n); // extra bytes to write off the end into
if (!a->out) return stbi__err("outofmem", "Out of memory");
+
+ img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+ img_len = (img_width_bytes + 1) * y;
if (s->img_x == x && s->img_y == y) {
- if (raw_len != (img_n * x + 1) * y) return stbi__err("not enough pixels","Corrupt PNG");
+ if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
} else { // interlaced:
- if (raw_len < (img_n * x + 1) * y) return stbi__err("not enough pixels","Corrupt PNG");
+ if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
}
+
for (j=0; j < y; ++j) {
stbi_uc *cur = a->out + stride*j;
stbi_uc *prior = cur - stride;
int filter = *raw++;
- if (filter > 4) return stbi__err("invalid filter","Corrupt PNG");
+ int filter_bytes = img_n;
+ int width = x;
+ if (filter > 4)
+ return stbi__err("invalid filter","Corrupt PNG");
+
+ if (depth < 8) {
+ STBI_ASSERT(img_width_bytes <= x);
+ cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
+ filter_bytes = 1;
+ width = img_width_bytes;
+ }
+
// if first row, use special filter that doesn't sample previous row
if (j == 0) filter = first_row_filter[filter];
- // handle first pixel explicitly
- for (k=0; k < img_n; ++k) {
+
+ // handle first byte explicitly
+ for (k=0; k < filter_bytes; ++k) {
switch (filter) {
case STBI__F_none : cur[k] = raw[k]; break;
case STBI__F_sub : cur[k] = raw[k]; break;
@@ -2526,26 +3907,37 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
case STBI__F_paeth_first: cur[k] = raw[k]; break;
}
}
- if (img_n != out_n) cur[img_n] = 255;
- raw += img_n;
- cur += out_n;
- prior += out_n;
+
+ if (depth == 8) {
+ if (img_n != out_n)
+ cur[img_n] = 255; // first pixel
+ raw += img_n;
+ cur += out_n;
+ prior += out_n;
+ } else {
+ raw += 1;
+ cur += 1;
+ prior += 1;
+ }
+
// this is a little gross, so that we don't switch per-pixel or per-component
- if (img_n == out_n) {
+ if (depth < 8 || img_n == out_n) {
+ int nk = (width - 1)*img_n;
#define CASE(f) \
case f: \
- for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \
- for (k=0; k < img_n; ++k)
+ for (k=0; k < nk; ++k)
switch (filter) {
- CASE(STBI__F_none) cur[k] = raw[k]; break;
- CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k-img_n]); break;
+ // "none" filter turns into a memcpy here; make that explicit.
+ case STBI__F_none: memcpy(cur, raw, nk); break;
+ CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
- CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-img_n])>>1)); break;
- CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-img_n],prior[k],prior[k-img_n])); break;
- CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k-img_n] >> 1)); break;
- CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-img_n],0,0)); break;
+ CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
+ CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
+ CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
+ CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
}
#undef CASE
+ raw += nk;
} else {
STBI_ASSERT(img_n+1 == out_n);
#define CASE(f) \
@@ -2564,15 +3956,90 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
#undef CASE
}
}
+
+ // we make a separate pass to expand bits to pixels; for performance,
+ // this could run two scanlines behind the above code, so it won't
+ // intefere with filtering but will still be in the cache.
+ if (depth < 8) {
+ for (j=0; j < y; ++j) {
+ stbi_uc *cur = a->out + stride*j;
+ stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
+ // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
+ // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
+ stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+
+ // note that the final byte might overshoot and write more data than desired.
+ // we can allocate enough data that this never writes out of memory, but it
+ // could also overwrite the next scanline. can it overwrite non-empty data
+ // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
+ // so we need to explicitly clamp the final ones
+
+ if (depth == 4) {
+ for (k=x*img_n; k >= 2; k-=2, ++in) {
+ *cur++ = scale * ((*in >> 4) );
+ *cur++ = scale * ((*in ) & 0x0f);
+ }
+ if (k > 0) *cur++ = scale * ((*in >> 4) );
+ } else if (depth == 2) {
+ for (k=x*img_n; k >= 4; k-=4, ++in) {
+ *cur++ = scale * ((*in >> 6) );
+ *cur++ = scale * ((*in >> 4) & 0x03);
+ *cur++ = scale * ((*in >> 2) & 0x03);
+ *cur++ = scale * ((*in ) & 0x03);
+ }
+ if (k > 0) *cur++ = scale * ((*in >> 6) );
+ if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
+ if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
+ } else if (depth == 1) {
+ for (k=x*img_n; k >= 8; k-=8, ++in) {
+ *cur++ = scale * ((*in >> 7) );
+ *cur++ = scale * ((*in >> 6) & 0x01);
+ *cur++ = scale * ((*in >> 5) & 0x01);
+ *cur++ = scale * ((*in >> 4) & 0x01);
+ *cur++ = scale * ((*in >> 3) & 0x01);
+ *cur++ = scale * ((*in >> 2) & 0x01);
+ *cur++ = scale * ((*in >> 1) & 0x01);
+ *cur++ = scale * ((*in ) & 0x01);
+ }
+ if (k > 0) *cur++ = scale * ((*in >> 7) );
+ if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
+ if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
+ if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
+ if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
+ if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
+ if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
+ }
+ if (img_n != out_n) {
+ // insert alpha = 255
+ stbi_uc *cur = a->out + stride*j;
+ int i;
+ if (img_n == 1) {
+ for (i=x-1; i >= 0; --i) {
+ cur[i*2+1] = 255;
+ cur[i*2+0] = cur[i];
+ }
+ } else {
+ assert(img_n == 3);
+ for (i=x-1; i >= 0; --i) {
+ cur[i*4+3] = 255;
+ cur[i*4+2] = cur[i*3+2];
+ cur[i*4+1] = cur[i*3+1];
+ cur[i*4+0] = cur[i*3+0];
+ }
+ }
+ }
+ }
+ }
+
return 1;
}
-static int stbi__create_png_image(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, int interlaced)
+static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
{
stbi_uc *final;
int p;
if (!interlaced)
- return stbi__create_png_image_raw(a, raw, raw_len, out_n, a->s->img_x, a->s->img_y);
+ return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
// de-interlacing
final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n);
@@ -2586,17 +4053,22 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_l
x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
if (x && y) {
- if (!stbi__create_png_image_raw(a, raw, raw_len, out_n, x, y)) {
- free(final);
+ stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
+ if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
+ STBI_FREE(final);
return 0;
}
- for (j=0; j < y; ++j)
- for (i=0; i < x; ++i)
- memcpy(final + (j*yspc[p]+yorig[p])*a->s->img_x*out_n + (i*xspc[p]+xorig[p])*out_n,
+ for (j=0; j < y; ++j) {
+ for (i=0; i < x; ++i) {
+ int out_y = j*yspc[p]+yorig[p];
+ int out_x = i*xspc[p]+xorig[p];
+ memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
a->out + (j*x+i)*out_n, out_n);
- free(a->out);
- raw += (x*out_n+1)*y;
- raw_len -= (x*out_n+1)*y;
+ }
+ }
+ STBI_FREE(a->out);
+ image_data += img_len;
+ image_data_len -= img_len;
}
}
a->out = final;
@@ -2658,7 +4130,7 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
p += 4;
}
}
- free(a->out);
+ STBI_FREE(a->out);
a->out = temp_out;
STBI_NOTUSED(len);
@@ -2706,7 +4178,7 @@ static void stbi__de_iphone(stbi__png *z)
} else {
p[0] = p[2];
p[2] = t;
- }
+ }
p += 4;
}
} else {
@@ -2721,12 +4193,14 @@ static void stbi__de_iphone(stbi__png *z)
}
}
+#define STBI__PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
+
static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
{
stbi_uc palette[1024], pal_img_n=0;
stbi_uc has_trans=0, tc[3];
stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
- int first=1,k,interlace=0, is_iphone=0;
+ int first=1,k,interlace=0, color=0, depth=0, is_iphone=0;
stbi__context *s = z->s;
z->expanded = NULL;
@@ -2735,23 +4209,23 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (!stbi__check_png_header(s)) return 0;
- if (scan == SCAN_type) return 1;
+ if (scan == STBI__SCAN_type) return 1;
for (;;) {
stbi__pngchunk c = stbi__get_chunk_header(s);
switch (c.type) {
- case PNG_TYPE('C','g','B','I'):
+ case STBI__PNG_TYPE('C','g','B','I'):
is_iphone = 1;
stbi__skip(s, c.length);
break;
- case PNG_TYPE('I','H','D','R'): {
- int depth,color,comp,filter;
+ case STBI__PNG_TYPE('I','H','D','R'): {
+ int comp,filter;
if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
first = 0;
if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
- depth = stbi__get8(s); if (depth != 8) return stbi__err("8bit only","PNG not supported: 8-bit only");
+ depth = stbi__get8(s); if (depth != 1 && depth != 2 && depth != 4 && depth != 8) return stbi__err("1/2/4/8-bit only","PNG not supported: 1/2/4/8-bit only");
color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
@@ -2761,7 +4235,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (!pal_img_n) {
s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
- if (scan == SCAN_header) return 1;
+ if (scan == STBI__SCAN_header) return 1;
} else {
// if paletted, then pal_n is our final components, and
// img_n is # components to decompress/filter.
@@ -2772,7 +4246,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
break;
}
- case PNG_TYPE('P','L','T','E'): {
+ case STBI__PNG_TYPE('P','L','T','E'): {
if (first) return stbi__err("first not IHDR", "Corrupt PNG");
if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
pal_len = c.length / 3;
@@ -2786,11 +4260,11 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
break;
}
- case PNG_TYPE('t','R','N','S'): {
+ case STBI__PNG_TYPE('t','R','N','S'): {
if (first) return stbi__err("first not IHDR", "Corrupt PNG");
if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
if (pal_img_n) {
- if (scan == SCAN_header) { s->img_n = 4; return 1; }
+ if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
pal_img_n = 4;
@@ -2801,21 +4275,21 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
has_trans = 1;
for (k=0; k < s->img_n; ++k)
- tc[k] = (stbi_uc) (stbi__get16be(s) & 255); // non 8-bit images will be larger
+ tc[k] = (stbi_uc) (stbi__get16be(s) & 255) * stbi__depth_scale_table[depth]; // non 8-bit images will be larger
}
break;
}
- case PNG_TYPE('I','D','A','T'): {
+ case STBI__PNG_TYPE('I','D','A','T'): {
if (first) return stbi__err("first not IHDR", "Corrupt PNG");
if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
- if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; }
+ if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
if (ioff + c.length > idata_limit) {
stbi_uc *p;
if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
while (ioff + c.length > idata_limit)
idata_limit *= 2;
- p = (stbi_uc *) realloc(z->idata, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
+ p = (stbi_uc *) STBI_REALLOC(z->idata, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
z->idata = p;
}
if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
@@ -2823,19 +4297,22 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
break;
}
- case PNG_TYPE('I','E','N','D'): {
- stbi__uint32 raw_len;
+ case STBI__PNG_TYPE('I','E','N','D'): {
+ stbi__uint32 raw_len, bpl;
if (first) return stbi__err("first not IHDR", "Corrupt PNG");
- if (scan != SCAN_load) return 1;
+ if (scan != STBI__SCAN_load) return 1;
if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
- z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, 16384, (int *) &raw_len, !is_iphone);
+ // initial guess for decoded data size to avoid unnecessary reallocs
+ bpl = (s->img_x * depth + 7) / 8; // bytes per line, per component
+ raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+ z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
if (z->expanded == NULL) return 0; // zlib should set error
- free(z->idata); z->idata = NULL;
+ STBI_FREE(z->idata); z->idata = NULL;
if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
s->img_out_n = s->img_n+1;
else
s->img_out_n = s->img_n;
- if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0;
+ if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, depth, color, interlace)) return 0;
if (has_trans)
if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
@@ -2848,7 +4325,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
return 0;
}
- free(z->expanded); z->expanded = NULL;
+ STBI_FREE(z->expanded); z->expanded = NULL;
return 1;
}
@@ -2878,7 +4355,7 @@ static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req
{
unsigned char *result=NULL;
if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
- if (stbi__parse_png_file(p, SCAN_load, req_comp)) {
+ if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
result = p->out;
p->out = NULL;
if (req_comp && req_comp != p->s->img_out_n) {
@@ -2890,9 +4367,9 @@ static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req
*y = p->s->img_y;
if (n) *n = p->s->img_out_n;
}
- free(p->out); p->out = NULL;
- free(p->expanded); p->expanded = NULL;
- free(p->idata); p->idata = NULL;
+ STBI_FREE(p->out); p->out = NULL;
+ STBI_FREE(p->expanded); p->expanded = NULL;
+ STBI_FREE(p->idata); p->idata = NULL;
return result;
}
@@ -2914,7 +4391,7 @@ static int stbi__png_test(stbi__context *s)
static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
{
- if (!stbi__parse_png_file(p, SCAN_header, 0)) {
+ if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
stbi__rewind( p->s );
return 0;
}
@@ -2930,8 +4407,11 @@ static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
p.s = s;
return stbi__png_info_raw(&p, x, y, comp);
}
+#endif
// Microsoft/Windows BMP image
+
+#ifndef STBI_NO_BMP
static int stbi__bmp_test_raw(stbi__context *s)
{
int r;
@@ -3094,7 +4574,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
if (!out) return stbi__errpuc("outofmem", "Out of memory");
if (bpp < 16) {
int z=0;
- if (psize == 0 || psize > 256) { free(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
+ if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
for (i=0; i < psize; ++i) {
pal[i][2] = stbi__get8(s);
pal[i][1] = stbi__get8(s);
@@ -3105,7 +4585,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
stbi__skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
if (bpp == 4) width = (s->img_x + 1) >> 1;
else if (bpp == 8) width = s->img_x;
- else { free(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
+ else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
pad = (-width)&3;
for (j=0; j < (int) s->img_y; ++j) {
for (i=0; i < (int) s->img_x; i += 2) {
@@ -3143,7 +4623,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
easy = 2;
}
if (!easy) {
- if (!mr || !mg || !mb) { free(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+ if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
// right shift amt to put high bit in position #7
rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
@@ -3169,7 +4649,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
- if (target == 4) out[z++] = STBI__BYTECAST(a);
+ if (target == 4) out[z++] = STBI__BYTECAST(a);
}
}
stbi__skip(s, pad);
@@ -3196,10 +4676,11 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
if (comp) *comp = s->img_n;
return out;
}
+#endif
// Targa Truevision - TGA
// by Jonathan Dummer
-
+#ifndef STBI_NO_TGA
static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
{
int tga_w, tga_h, tga_comp;
@@ -3340,12 +4821,12 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
// load the palette
tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_palette_bits / 8 );
if (!tga_palette) {
- free(tga_data);
+ STBI_FREE(tga_data);
return stbi__errpuc("outofmem", "Out of memory");
}
if (!stbi__getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 )) {
- free(tga_data);
- free(tga_palette);
+ STBI_FREE(tga_data);
+ STBI_FREE(tga_palette);
return stbi__errpuc("bad palette", "Corrupt TGA");
}
}
@@ -3427,7 +4908,7 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
// clear my palette, if I had one
if ( tga_palette != NULL )
{
- free( tga_palette );
+ STBI_FREE( tga_palette );
}
}
@@ -3455,10 +4936,12 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
// OK, done
return tga_data;
}
+#endif
// *************************************************************************************************
// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
+#ifndef STBI_NO_PSD
static int stbi__psd_test(stbi__context *s)
{
int r = (stbi__get32be(s) == 0x38425053);
@@ -3493,7 +4976,7 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
// Read the rows and columns of the image.
h = stbi__get32be(s);
w = stbi__get32be(s);
-
+
// Make sure the depth is 8 bits.
if (stbi__get16be(s) != 8)
return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 bit");
@@ -3535,7 +5018,7 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
// Initialize the data to zero.
//memset( out, 0, pixelCount * 4 );
-
+
// Finally, the image data.
if (compression) {
// RLE as used by .PSD and .TIFF
@@ -3553,7 +5036,7 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
// Read the RLE data by channel.
for (channel = 0; channel < 4; channel++) {
stbi_uc *p;
-
+
p = out+channel;
if (channel >= channelCount) {
// Fill this channel with default data.
@@ -3591,15 +5074,15 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
}
}
}
-
+
} else {
// We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
// where each channel consists of an 8-bit value for each pixel in the image.
-
+
// Read the data by channel.
for (channel = 0; channel < 4; channel++) {
stbi_uc *p;
-
+
p = out + channel;
if (channel > channelCount) {
// Fill this channel with default data.
@@ -3620,9 +5103,10 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
if (comp) *comp = channelCount;
*y = h;
*x = w;
-
+
return out;
}
+#endif
// *************************************************************************************************
// Softimage PIC loader
@@ -3631,6 +5115,7 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
+#ifndef STBI_NO_PIC
static int stbi__pic_is4(stbi__context *s,const char *str)
{
int i;
@@ -3818,7 +5303,7 @@ static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int re
memset(result, 0xff, x*y*4);
if (!stbi__pic_load_core(s,x,y,comp, result)) {
- free(result);
+ STBI_FREE(result);
result=0;
}
*px = x;
@@ -3835,10 +5320,13 @@ static int stbi__pic_test(stbi__context *s)
stbi__rewind(s);
return r;
}
+#endif
// *************************************************************************************************
// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
-typedef struct
+
+#ifndef STBI_NO_GIF
+typedef struct
{
stbi__int16 prefix;
stbi_uc first;
@@ -3886,8 +5374,8 @@ static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], in
pal[i][2] = stbi__get8(s);
pal[i][1] = stbi__get8(s);
pal[i][0] = stbi__get8(s);
- pal[i][3] = transp ? 0 : 255;
- }
+ pal[i][3] = transp == i ? 0 : 255;
+ }
}
static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
@@ -3898,8 +5386,8 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
version = stbi__get8(s);
if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
- if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
-
+ if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
+
stbi__g_failure_reason = "";
g->w = stbi__get16le(s);
g->h = stbi__get16le(s);
@@ -3920,7 +5408,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
{
- stbi__gif g;
+ stbi__gif g;
if (!stbi__gif_header(s, &g, comp, 1)) {
stbi__rewind( s );
return 0;
@@ -3940,7 +5428,7 @@ static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
stbi__out_gif_code(g, g->codes[code].prefix);
if (g->cur_y >= g->max_y) return;
-
+
p = &g->out[g->cur_x + g->cur_y];
c = &g->color_table[g->codes[code].suffix * 4];
@@ -3994,7 +5482,7 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
if (valid_bits < codesize) {
if (len == 0) {
len = stbi__get8(s); // start new block
- if (len == 0)
+ if (len == 0)
return g->out;
}
--len;
@@ -4039,7 +5527,7 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
} else {
return stbi__errpuc("illegal code in raster", "Corrupt GIF");
}
- }
+ }
}
}
@@ -4077,7 +5565,7 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
memcpy(g->out, old_out, g->w*g->h*4);
}
}
-
+
for (;;) {
switch (stbi__get8(s)) {
case 0x2C: /* Image Descriptor */
@@ -4112,16 +5600,16 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
if (g->lflags & 0x80) {
stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
- g->color_table = (stbi_uc *) g->lpal;
+ g->color_table = (stbi_uc *) g->lpal;
} else if (g->flags & 0x80) {
for (i=0; i < 256; ++i) // @OPTIMIZE: stbi__jpeg_reset only the previous transparent
- g->pal[i][3] = 255;
+ g->pal[i][3] = 255;
if (g->transparent >= 0 && (g->eflags & 0x01))
g->pal[g->transparent][3] = 0;
g->color_table = (stbi_uc *) g->pal;
} else
return stbi__errpuc("missing color table", "Corrupt GIF");
-
+
o = stbi__process_gif_raster(s, g);
if (o == NULL) return NULL;
@@ -4178,7 +5666,7 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
{
return stbi__gif_info_raw(s,x,y,comp);
}
-
+#endif
// *************************************************************************************************
// Radiance RGBE HDR loader
@@ -4267,7 +5755,7 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
// Check identifier
if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
return stbi__errpf("not HDR", "Corrupt HDR image");
-
+
// Parse header
for(;;) {
token = stbi__hdr_gettoken(s,buffer);
@@ -4328,14 +5816,14 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
stbi__hdr_convert(hdr_data, rgbe, req_comp);
i = 1;
j = 0;
- free(scanline);
+ STBI_FREE(scanline);
goto main_decode_loop; // yes, this makes no sense
}
len <<= 8;
len |= stbi__get8(s);
- if (len != width) { free(hdr_data); free(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
+ if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4);
-
+
for (k = 0; k < 4; ++k) {
i = 0;
while (i < width) {
@@ -4356,7 +5844,7 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
for (i=0; i < width; ++i)
stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
}
- free(scanline);
+ STBI_FREE(scanline);
}
return hdr_data;
@@ -4402,6 +5890,7 @@ static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
}
#endif // STBI_NO_HDR
+#ifndef STBI_NO_BMP
static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
{
int hsz;
@@ -4429,7 +5918,9 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
*comp = stbi__get16le(s) / 8;
return 1;
}
+#endif
+#ifndef STBI_NO_PSD
static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
{
int channelCount;
@@ -4460,7 +5951,9 @@ static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
*comp = 4;
return 1;
}
+#endif
+#ifndef STBI_NO_PIC
static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
{
int act_comp=0,num_packets=0,chained;
@@ -4505,28 +5998,156 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
return 1;
}
+#endif
+
+// *************************************************************************************************
+// Portable Gray Map and Portable Pixel Map loader
+// by Ken Miller
+//
+// PGM: http://netpbm.sourceforge.net/doc/pgm.html
+// PPM: http://netpbm.sourceforge.net/doc/ppm.html
+//
+// Known limitations:
+// Does not support comments in the header section
+// Does not support ASCII image data (formats P2 and P3)
+// Does not support 16-bit-per-channel
+
+#ifndef STBI_NO_PNM
+
+static int stbi__pnm_test(stbi__context *s)
+{
+ char p, t;
+ p = (char) stbi__get8(s);
+ t = (char) stbi__get8(s);
+ if (p != 'P' || (t != '5' && t != '6')) {
+ stbi__rewind( s );
+ return 0;
+ }
+ return 1;
+}
+
+static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+ stbi_uc *out;
+ if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
+ return 0;
+ *x = s->img_x;
+ *y = s->img_y;
+ *comp = s->img_n;
+
+ out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y);
+ if (!out) return stbi__errpuc("outofmem", "Out of memory");
+ stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
+
+ if (req_comp && req_comp != s->img_n) {
+ out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+ if (out == NULL) return out; // stbi__convert_format frees input on failure
+ }
+ return out;
+}
+
+static int stbi__pnm_isspace(char c)
+{
+ return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
+}
+
+static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
+{
+ while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
+ *c = (char) stbi__get8(s);
+}
+
+static int stbi__pnm_isdigit(char c)
+{
+ return c >= '0' && c <= '9';
+}
+
+static int stbi__pnm_getinteger(stbi__context *s, char *c)
+{
+ int value = 0;
+
+ while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
+ value = value*10 + (*c - '0');
+ *c = (char) stbi__get8(s);
+ }
+
+ return value;
+}
+
+static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
+{
+ int maxv;
+ char c, p, t;
+
+ stbi__rewind( s );
+
+ // Get identifier
+ p = (char) stbi__get8(s);
+ t = (char) stbi__get8(s);
+ if (p != 'P' || (t != '5' && t != '6')) {
+ stbi__rewind( s );
+ return 0;
+ }
+
+ *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
+
+ c = (char) stbi__get8(s);
+ stbi__pnm_skip_whitespace(s, &c);
+
+ *x = stbi__pnm_getinteger(s, &c); // read width
+ stbi__pnm_skip_whitespace(s, &c);
+
+ *y = stbi__pnm_getinteger(s, &c); // read height
+ stbi__pnm_skip_whitespace(s, &c);
+
+ maxv = stbi__pnm_getinteger(s, &c); // read max value
+
+ if (maxv > 255)
+ return stbi__err("max value > 255", "PPM image not 8-bit");
+ else
+ return 1;
+}
+#endif
static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
{
- if (stbi__jpeg_info(s, x, y, comp))
- return 1;
- if (stbi__png_info(s, x, y, comp))
- return 1;
- if (stbi__gif_info(s, x, y, comp))
- return 1;
- if (stbi__bmp_info(s, x, y, comp))
- return 1;
- if (stbi__psd_info(s, x, y, comp))
- return 1;
- if (stbi__pic_info(s, x, y, comp))
- return 1;
+ #ifndef STBI_NO_JPEG
+ if (stbi__jpeg_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PNG
+ if (stbi__png_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_GIF
+ if (stbi__gif_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_BMP
+ if (stbi__bmp_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PSD
+ if (stbi__psd_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PIC
+ if (stbi__pic_info(s, x, y, comp)) return 1;
+ #endif
+
+ #ifndef STBI_NO_PNM
+ if (stbi__pnm_info(s, x, y, comp)) return 1;
+ #endif
+
#ifndef STBI_NO_HDR
- if (stbi__hdr_info(s, x, y, comp))
- return 1;
+ if (stbi__hdr_info(s, x, y, comp)) return 1;
#endif
+
// test tga last because it's a crappy test!
+ #ifndef STBI_NO_TGA
if (stbi__tga_info(s, x, y, comp))
return 1;
+ #endif
return stbi__err("unknown image type", "Image not of any known type, or corrupt");
}
@@ -4571,123 +6192,134 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int
/*
revision history:
- 1.46 (2014-08-26)
- fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
- 1.45 (2014-08-16)
- fix MSVC-ARM internal compiler error by wrapping malloc
- 1.44 (2014-08-07)
- various warning fixes from Ronny Chevalier
- 1.43 (2014-07-15)
- fix MSVC-only compiler problem in code changed in 1.42
- 1.42 (2014-07-09)
- don't define _CRT_SECURE_NO_WARNINGS (affects user code)
- fixes to stbi__cleanup_jpeg path
- added STBI_ASSERT to avoid requiring assert.h
- 1.41 (2014-06-25)
- fix search&replace from 1.36 that messed up comments/error messages
- 1.40 (2014-06-22)
- fix gcc struct-initialization warning
- 1.39 (2014-06-15)
- fix to TGA optimization when req_comp != number of components in TGA;
- fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
- add support for BMP version 5 (more ignored fields)
- 1.38 (2014-06-06)
- suppress MSVC warnings on integer casts truncating values
- fix accidental rename of 'skip' field of I/O
- 1.37 (2014-06-04)
- remove duplicate typedef
- 1.36 (2014-06-03)
- convert to header file single-file library
- if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
- 1.35 (2014-05-27)
- various warnings
- fix broken STBI_SIMD path
- fix bug where stbi_load_from_file no longer left file pointer in correct place
- fix broken non-easy path for 32-bit BMP (possibly never used)
- TGA optimization by Arseny Kapoulkine
- 1.34 (unknown)
- use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
- 1.33 (2011-07-14)
- make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
- 1.32 (2011-07-13)
- support for "info" function for all supported filetypes (SpartanJ)
- 1.31 (2011-06-20)
- a few more leak fixes, bug in PNG handling (SpartanJ)
- 1.30 (2011-06-11)
- added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
- removed deprecated format-specific test/load functions
- removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
- error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
- fix inefficiency in decoding 32-bit BMP (David Woo)
- 1.29 (2010-08-16)
- various warning fixes from Aurelien Pocheville
- 1.28 (2010-08-01)
- fix bug in GIF palette transparency (SpartanJ)
- 1.27 (2010-08-01)
- cast-to-stbi_uc to fix warnings
- 1.26 (2010-07-24)
- fix bug in file buffering for PNG reported by SpartanJ
- 1.25 (2010-07-17)
- refix trans_data warning (Won Chun)
- 1.24 (2010-07-12)
- perf improvements reading from files on platforms with lock-heavy fgetc()
- minor perf improvements for jpeg
- deprecated type-specific functions so we'll get feedback if they're needed
- attempt to fix trans_data warning (Won Chun)
- 1.23 fixed bug in iPhone support
- 1.22 (2010-07-10)
- removed image *writing* support
- stbi_info support from Jetro Lauha
- GIF support from Jean-Marc Lienher
- iPhone PNG-extensions from James Brown
- warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
- 1.21 fix use of 'stbi_uc' in header (reported by jon blow)
- 1.20 added support for Softimage PIC, by Tom Seddon
- 1.19 bug in interlaced PNG corruption check (found by ryg)
+ 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+ 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
+ progressive JPEG (stb)
+ PGM/PPM support (Ken Miller)
+ STBI_MALLOC,STBI_REALLOC,STBI_FREE
+ GIF bugfix -- seemingly never worked
+ STBI_NO_*, STBI_ONLY_*
+ 1.48 (2014-12-14) fix incorrectly-named assert()
+ 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
+ optimize PNG (ryg)
+ fix bug in interlaced PNG with user-specified channel count (stb)
+ 1.46 (2014-08-26)
+ fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
+ 1.45 (2014-08-16)
+ fix MSVC-ARM internal compiler error by wrapping malloc
+ 1.44 (2014-08-07)
+ various warning fixes from Ronny Chevalier
+ 1.43 (2014-07-15)
+ fix MSVC-only compiler problem in code changed in 1.42
+ 1.42 (2014-07-09)
+ don't define _CRT_SECURE_NO_WARNINGS (affects user code)
+ fixes to stbi__cleanup_jpeg path
+ added STBI_ASSERT to avoid requiring assert.h
+ 1.41 (2014-06-25)
+ fix search&replace from 1.36 that messed up comments/error messages
+ 1.40 (2014-06-22)
+ fix gcc struct-initialization warning
+ 1.39 (2014-06-15)
+ fix to TGA optimization when req_comp != number of components in TGA;
+ fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
+ add support for BMP version 5 (more ignored fields)
+ 1.38 (2014-06-06)
+ suppress MSVC warnings on integer casts truncating values
+ fix accidental rename of 'skip' field of I/O
+ 1.37 (2014-06-04)
+ remove duplicate typedef
+ 1.36 (2014-06-03)
+ convert to header file single-file library
+ if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
+ 1.35 (2014-05-27)
+ various warnings
+ fix broken STBI_SIMD path
+ fix bug where stbi_load_from_file no longer left file pointer in correct place
+ fix broken non-easy path for 32-bit BMP (possibly never used)
+ TGA optimization by Arseny Kapoulkine
+ 1.34 (unknown)
+ use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
+ 1.33 (2011-07-14)
+ make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
+ 1.32 (2011-07-13)
+ support for "info" function for all supported filetypes (SpartanJ)
+ 1.31 (2011-06-20)
+ a few more leak fixes, bug in PNG handling (SpartanJ)
+ 1.30 (2011-06-11)
+ added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
+ removed deprecated format-specific test/load functions
+ removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
+ error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
+ fix inefficiency in decoding 32-bit BMP (David Woo)
+ 1.29 (2010-08-16)
+ various warning fixes from Aurelien Pocheville
+ 1.28 (2010-08-01)
+ fix bug in GIF palette transparency (SpartanJ)
+ 1.27 (2010-08-01)
+ cast-to-stbi_uc to fix warnings
+ 1.26 (2010-07-24)
+ fix bug in file buffering for PNG reported by SpartanJ
+ 1.25 (2010-07-17)
+ refix trans_data warning (Won Chun)
+ 1.24 (2010-07-12)
+ perf improvements reading from files on platforms with lock-heavy fgetc()
+ minor perf improvements for jpeg
+ deprecated type-specific functions so we'll get feedback if they're needed
+ attempt to fix trans_data warning (Won Chun)
+ 1.23 fixed bug in iPhone support
+ 1.22 (2010-07-10)
+ removed image *writing* support
+ stbi_info support from Jetro Lauha
+ GIF support from Jean-Marc Lienher
+ iPhone PNG-extensions from James Brown
+ warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
+ 1.21 fix use of 'stbi_uc' in header (reported by jon blow)
+ 1.20 added support for Softimage PIC, by Tom Seddon
+ 1.19 bug in interlaced PNG corruption check (found by ryg)
1.18 2008-08-02
- fix a threading bug (local mutable static)
- 1.17 support interlaced PNG
- 1.16 major bugfix - stbi__convert_format converted one too many pixels
- 1.15 initialize some fields for thread safety
- 1.14 fix threadsafe conversion bug
- header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
- 1.13 threadsafe
- 1.12 const qualifiers in the API
- 1.11 Support installable IDCT, colorspace conversion routines
- 1.10 Fixes for 64-bit (don't use "unsigned long")
- optimized upsampling by Fabian "ryg" Giesen
- 1.09 Fix format-conversion for PSD code (bad global variables!)
- 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
- 1.07 attempt to fix C++ warning/errors again
- 1.06 attempt to fix C++ warning/errors again
- 1.05 fix TGA loading to return correct *comp and use good luminance calc
- 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
- 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
- 1.02 support for (subset of) HDR files, float interface for preferred access to them
- 1.01 fix bug: possible bug in handling right-side up bmps... not sure
- fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
- 1.00 interface to zlib that skips zlib header
- 0.99 correct handling of alpha in palette
- 0.98 TGA loader by lonesock; dynamically add loaders (untested)
- 0.97 jpeg errors on too large a file; also catch another malloc failure
- 0.96 fix detection of invalid v value - particleman@mollyrocket forum
- 0.95 during header scan, seek to markers in case of padding
- 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
- 0.93 handle jpegtran output; verbose errors
- 0.92 read 4,8,16,24,32-bit BMP files of several formats
- 0.91 output 24-bit Windows 3.0 BMP files
- 0.90 fix a few more warnings; bump version number to approach 1.0
- 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
- 0.60 fix compiling as c++
- 0.59 fix warnings: merge Dave Moore's -Wall fixes
- 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
- 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
- 0.56 fix bug: zlib uncompressed mode len vs. nlen
- 0.55 fix bug: restart_interval not initialized to 0
- 0.54 allow NULL for 'int *comp'
- 0.53 fix bug in png 3->4; speedup png decoding
- 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
- 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
- on 'test' only check type, not whether we support this variant
- 0.50 first released version
+ fix a threading bug (local mutable static)
+ 1.17 support interlaced PNG
+ 1.16 major bugfix - stbi__convert_format converted one too many pixels
+ 1.15 initialize some fields for thread safety
+ 1.14 fix threadsafe conversion bug
+ header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
+ 1.13 threadsafe
+ 1.12 const qualifiers in the API
+ 1.11 Support installable IDCT, colorspace conversion routines
+ 1.10 Fixes for 64-bit (don't use "unsigned long")
+ optimized upsampling by Fabian "ryg" Giesen
+ 1.09 Fix format-conversion for PSD code (bad global variables!)
+ 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
+ 1.07 attempt to fix C++ warning/errors again
+ 1.06 attempt to fix C++ warning/errors again
+ 1.05 fix TGA loading to return correct *comp and use good luminance calc
+ 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
+ 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
+ 1.02 support for (subset of) HDR files, float interface for preferred access to them
+ 1.01 fix bug: possible bug in handling right-side up bmps... not sure
+ fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
+ 1.00 interface to zlib that skips zlib header
+ 0.99 correct handling of alpha in palette
+ 0.98 TGA loader by lonesock; dynamically add loaders (untested)
+ 0.97 jpeg errors on too large a file; also catch another malloc failure
+ 0.96 fix detection of invalid v value - particleman@mollyrocket forum
+ 0.95 during header scan, seek to markers in case of padding
+ 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
+ 0.93 handle jpegtran output; verbose errors
+ 0.92 read 4,8,16,24,32-bit BMP files of several formats
+ 0.91 output 24-bit Windows 3.0 BMP files
+ 0.90 fix a few more warnings; bump version number to approach 1.0
+ 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
+ 0.60 fix compiling as c++
+ 0.59 fix warnings: merge Dave Moore's -Wall fixes
+ 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
+ 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
+ 0.56 fix bug: zlib uncompressed mode len vs. nlen
+ 0.55 fix bug: restart_interval not initialized to 0
+ 0.54 allow NULL for 'int *comp'
+ 0.53 fix bug in png 3->4; speedup png decoding
+ 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
+ 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
+ on 'test' only check type, not whether we support this variant
+ 0.50 first released version
*/
diff --git a/src/text.c b/src/text.c
index 8b7e5b12..94481857 100644
--- a/src/text.c
+++ b/src/text.c
@@ -63,7 +63,6 @@ static SpriteFont defaultFont; // Default font provided by raylib
//----------------------------------------------------------------------------------
static bool PixelIsMagenta(Color p); // Check if a pixel is magenta
static int ParseImageData(Color *imgDataPixel, int imgWidth, int imgHeight, Character **charSet); // Parse image pixel data to obtain character set measures
-static int GetNextPOT(int num); // Calculate next power-of-two value for a given value
static SpriteFont LoadRBMF(const char *fileName); // Load a rBMF font file (raylib BitMap Font)
extern void LoadDefaultFont(void);
@@ -135,7 +134,7 @@ extern void LoadDefaultFont(void)
if (counter > 256) counter = 0; // Security check...
}
- defaultFont.texture = CreateTexture(image, false); // Convert loaded image to OpenGL texture
+ defaultFont.texture = LoadTextureFromImage(image, false); // Convert loaded image to OpenGL texture
UnloadImage(image);
// Reconstruct charSet using charsWidth[], charsHeight, charsDivisor, numChars
@@ -168,7 +167,7 @@ extern void LoadDefaultFont(void)
else currentPosX = testPosX;
}
- TraceLog(INFO, "Default font loaded successfully");
+ TraceLog(INFO, "[TEX ID %i] Default font loaded successfully", defaultFont.texture.id);
}
extern void UnloadDefaultFont(void)
@@ -195,9 +194,10 @@ SpriteFont LoadSpriteFont(const char *fileName)
Image image = LoadImage(fileName);
// At this point we have a pixel array with all the data...
-
- TraceLog(INFO, "[%s] SpriteFont image loaded: %i x %i", fileName, image.width, image.height);
-
+
+#if defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
+ ConvertToPOT(&image, MAGENTA);
+#endif
// Process bitmap Font pixel data to get measures (Character array)
// spriteFont.charSet data is filled inside the function and memory is allocated!
int numChars = ParseImageData(image.pixels, image.width, image.height, &spriteFont.charSet);
@@ -207,40 +207,8 @@ SpriteFont LoadSpriteFont(const char *fileName)
spriteFont.numChars = numChars;
- // Convert image font to POT image before conversion to texture
- // NOTE: Not required, we skip this step
-/*
- // Just add the required amount of pixels at the right and bottom sides of image...
- int potWidth = GetNextPOT(image.width);
- int potHeight = GetNextPOT(image.height);
-
- // Check if POT texture generation is required (if texture is not already POT)
- if ((potWidth != image.width) || (potHeight != image.height))
- {
- Color *imgDataPixelPOT = NULL;
-
- // Generate POT array from NPOT data
- imgDataPixelPOT = (Color *)malloc(potWidth * potHeight * sizeof(Color));
-
- for (int j = 0; j < potHeight; j++)
- {
- for (int i = 0; i < potWidth; i++)
- {
- if ((j < image.height) && (i < image.width)) imgDataPixelPOT[j*potWidth + i] = image.pixels[j*image.width + i];
- else imgDataPixelPOT[j*potWidth + i] = MAGENTA;
- }
- }
-
- TraceLog(WARNING, "SpriteFont texture converted to POT: %ix%i", potWidth, potHeight);
-
- free(image.pixels);
-
- image.pixels = imgDataPixelPOT;
- image.width = potWidth;
- image.height = potHeight;
- }
-*/
- spriteFont.texture = CreateTexture(image, false); // Convert loaded image to OpenGL texture
+ spriteFont.texture = LoadTextureFromImage(image, false); // Convert loaded image to OpenGL texture
+
UnloadImage(image);
}
@@ -475,23 +443,6 @@ static int ParseImageData(Color *imgDataPixel, int imgWidth, int imgHeight, Char
return index;
}
-// Calculate next power-of-two value for a given num
-static int GetNextPOT(int num)
-{
- if (num != 0)
- {
- num--;
- num |= (num >> 1); // Or first 2 bits
- num |= (num >> 2); // Or next 2 bits
- num |= (num >> 4); // Or next 4 bits
- num |= (num >> 8); // Or next 8 bits
- num |= (num >> 16); // Or next 16 bits
- num++;
- }
-
- return num;
-}
-
// Load a rBMF font file (raylib BitMap Font)
static SpriteFont LoadRBMF(const char *fileName)
{
@@ -517,91 +468,96 @@ static SpriteFont LoadRBMF(const char *fileName)
Image image;
rbmfInfoHeader rbmfHeader;
- unsigned int *rbmfFileData;
- unsigned char *rbmfCharWidthData;
+ unsigned int *rbmfFileData = NULL;
+ unsigned char *rbmfCharWidthData = NULL;
int charsDivisor = 1; // Every char is separated from the consecutive by a 1 pixel divisor, horizontally and vertically
FILE *rbmfFile = fopen(fileName, "rb"); // Define a pointer to bitmap file and open it in read-binary mode
- // TODO: Check if file could be loaded! (rbmfFile == NULL)?
+ if (rbmfFile == NULL)
+ {
+ TraceLog(WARNING, "[%s] rBMF font file could not be opened", fileName);
+ }
+ else
+ {
+ fread(&rbmfHeader, sizeof(rbmfInfoHeader), 1, rbmfFile);
- fread(&rbmfHeader, sizeof(rbmfInfoHeader), 1, rbmfFile);
+ TraceLog(INFO, "[%s] Loading rBMF file, size: %ix%i, numChars: %i, charHeight: %i", fileName, rbmfHeader.imgWidth, rbmfHeader.imgHeight, rbmfHeader.numChars, rbmfHeader.charHeight);
- TraceLog(INFO, "[%s] Loading rBMF file, size: %ix%i, numChars: %i, charHeight: %i", fileName, rbmfHeader.imgWidth, rbmfHeader.imgHeight, rbmfHeader.numChars, rbmfHeader.charHeight);
+ spriteFont.numChars = (int)rbmfHeader.numChars;
- spriteFont.numChars = (int)rbmfHeader.numChars;
+ image.width = (int)rbmfHeader.imgWidth;
+ image.height = (int)rbmfHeader.imgHeight;
- image.width = (int)rbmfHeader.imgWidth;
- image.height = (int)rbmfHeader.imgHeight;
+ int numPixelBits = rbmfHeader.imgWidth * rbmfHeader.imgHeight / 32;
- int numPixelBits = rbmfHeader.imgWidth * rbmfHeader.imgHeight / 32;
+ rbmfFileData = (unsigned int *)malloc(numPixelBits * sizeof(unsigned int));
- rbmfFileData = (unsigned int *)malloc(numPixelBits * sizeof(unsigned int));
+ for(int i = 0; i < numPixelBits; i++) fread(&rbmfFileData[i], sizeof(unsigned int), 1, rbmfFile);
- for(int i = 0; i < numPixelBits; i++) fread(&rbmfFileData[i], sizeof(unsigned int), 1, rbmfFile);
+ rbmfCharWidthData = (unsigned char *)malloc(spriteFont.numChars * sizeof(unsigned char));
- rbmfCharWidthData = (unsigned char *)malloc(spriteFont.numChars * sizeof(unsigned char));
+ for(int i = 0; i < spriteFont.numChars; i++) fread(&rbmfCharWidthData[i], sizeof(unsigned char), 1, rbmfFile);
- for(int i = 0; i < spriteFont.numChars; i++) fread(&rbmfCharWidthData[i], sizeof(unsigned char), 1, rbmfFile);
+ // Re-construct image from rbmfFileData
+ //-----------------------------------------
+ image.pixels = (Color *)malloc(image.width * image.height * sizeof(Color));
- // Re-construct image from rbmfFileData
- //-----------------------------------------
- image.pixels = (Color *)malloc(image.width * image.height * sizeof(Color));
+ for (int i = 0; i < image.width * image.height; i++) image.pixels[i] = BLANK; // Initialize array
- for (int i = 0; i < image.width * image.height; i++) image.pixels[i] = BLANK; // Initialize array
+ int counter = 0; // Font data elements counter
- int counter = 0; // Font data elements counter
-
- // Fill image data (convert from bit to pixel!)
- for (int i = 0; i < image.width * image.height; i += 32)
- {
- for (int j = 31; j >= 0; j--)
+ // Fill image data (convert from bit to pixel!)
+ for (int i = 0; i < image.width * image.height; i += 32)
{
- if (BIT_CHECK(rbmfFileData[counter], j)) image.pixels[i+j] = WHITE;
- }
+ for (int j = 31; j >= 0; j--)
+ {
+ if (BIT_CHECK(rbmfFileData[counter], j)) image.pixels[i+j] = WHITE;
+ }
- counter++;
- }
+ counter++;
+ }
- TraceLog(INFO, "[%s] Image reconstructed correctly, now converting it to texture", fileName);
+ TraceLog(INFO, "[%s] Image reconstructed correctly, now converting it to texture", fileName);
- spriteFont.texture = CreateTexture(image, false);
- UnloadImage(image); // Unload image data
+ spriteFont.texture = LoadTextureFromImage(image, false);
+ UnloadImage(image); // Unload image data
- TraceLog(INFO, "[%s] Starting charSet reconstruction", fileName);
+ //TraceLog(INFO, "[%s] Starting charSet reconstruction", fileName);
- // Reconstruct charSet using rbmfCharWidthData, rbmfHeader.charHeight, charsDivisor, rbmfHeader.numChars
- spriteFont.charSet = (Character *)malloc(spriteFont.numChars * sizeof(Character)); // Allocate space for our character data
+ // Reconstruct charSet using rbmfCharWidthData, rbmfHeader.charHeight, charsDivisor, rbmfHeader.numChars
+ spriteFont.charSet = (Character *)malloc(spriteFont.numChars * sizeof(Character)); // Allocate space for our character data
- int currentLine = 0;
- int currentPosX = charsDivisor;
- int testPosX = charsDivisor;
+ int currentLine = 0;
+ int currentPosX = charsDivisor;
+ int testPosX = charsDivisor;
- for (int i = 0; i < spriteFont.numChars; i++)
- {
- spriteFont.charSet[i].value = (int)rbmfHeader.firstChar + i;
- spriteFont.charSet[i].x = currentPosX;
- spriteFont.charSet[i].y = charsDivisor + currentLine * ((int)rbmfHeader.charHeight + charsDivisor);
- spriteFont.charSet[i].w = (int)rbmfCharWidthData[i];
- spriteFont.charSet[i].h = (int)rbmfHeader.charHeight;
+ for (int i = 0; i < spriteFont.numChars; i++)
+ {
+ spriteFont.charSet[i].value = (int)rbmfHeader.firstChar + i;
+ spriteFont.charSet[i].x = currentPosX;
+ spriteFont.charSet[i].y = charsDivisor + currentLine * ((int)rbmfHeader.charHeight + charsDivisor);
+ spriteFont.charSet[i].w = (int)rbmfCharWidthData[i];
+ spriteFont.charSet[i].h = (int)rbmfHeader.charHeight;
- testPosX += (spriteFont.charSet[i].w + charsDivisor);
+ testPosX += (spriteFont.charSet[i].w + charsDivisor);
- if (testPosX > spriteFont.texture.width)
- {
- currentLine++;
- currentPosX = 2 * charsDivisor + (int)rbmfCharWidthData[i];
- testPosX = currentPosX;
+ if (testPosX > spriteFont.texture.width)
+ {
+ currentLine++;
+ currentPosX = 2 * charsDivisor + (int)rbmfCharWidthData[i];
+ testPosX = currentPosX;
- spriteFont.charSet[i].x = charsDivisor;
- spriteFont.charSet[i].y = charsDivisor + currentLine * (rbmfHeader.charHeight + charsDivisor);
+ spriteFont.charSet[i].x = charsDivisor;
+ spriteFont.charSet[i].y = charsDivisor + currentLine * (rbmfHeader.charHeight + charsDivisor);
+ }
+ else currentPosX = testPosX;
}
- else currentPosX = testPosX;
- }
-
- TraceLog(INFO, "[%s] rBMF file loaded correctly as SpriteFont", fileName);
+ TraceLog(INFO, "[%s] rBMF file loaded correctly as SpriteFont", fileName);
+ }
+
fclose(rbmfFile);
free(rbmfFileData); // Now we can free loaded data from RAM memory
diff --git a/src/textures.c b/src/textures.c
index 929cba78..f701c380 100644
--- a/src/textures.c
+++ b/src/textures.c
@@ -126,9 +126,9 @@ Image LoadImage(const char *fileName)
image.width = imgWidth;
image.height = imgHeight;
- TraceLog(INFO, "[%s] Image loaded successfully", fileName);
+ TraceLog(INFO, "[%s] Image loaded successfully (%ix%i)", fileName, image.width, image.height);
}
- else TraceLog(WARNING, "[%s] Image could not be loaded, file format not recognized", fileName);
+ else TraceLog(WARNING, "[%s] Image could not be loaded, file not recognized", fileName);
}
else if (strcmp(GetExtension(fileName),"dds") == 0)
{
@@ -175,8 +175,6 @@ Image LoadImage(const char *fileName)
// Load an image from rRES file (raylib Resource)
Image LoadImageFromRES(const char *rresName, int resId)
{
- // TODO: rresName could be directly a char array with all the data! --> support it! :P
-
Image image;
bool found = false;
@@ -189,7 +187,10 @@ Image LoadImageFromRES(const char *rresName, int resId)
FILE *rresFile = fopen(rresName, "rb");
- if (!rresFile) TraceLog(WARNING, "[%s] Could not open raylib resource file", rresName);
+ if (rresFile == NULL)
+ {
+ TraceLog(WARNING, "[%s] rRES raylib resource file could not be opened", rresName);
+ }
else
{
// Read rres file (basic file check - id)
@@ -295,6 +296,11 @@ Image LoadImageFromRES(const char *rresName, int resId)
Texture2D LoadTexture(const char *fileName)
{
Texture2D texture;
+
+ // Init texture to default values
+ texture.id = 0;
+ texture.width = 0;
+ texture.height = 0;
if (strcmp(GetExtension(fileName),"dds") == 0)
{
@@ -334,10 +340,13 @@ Texture2D LoadTexture(const char *fileName)
else
{
Image image = LoadImage(fileName);
-
+
if (image.pixels != NULL)
{
- texture = CreateTexture(image, false);
+#if defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
+ ConvertToPOT(&image, BLANK);
+#endif
+ texture = LoadTextureFromImage(image, false);
UnloadImage(image);
}
}
@@ -345,13 +354,66 @@ Texture2D LoadTexture(const char *fileName)
return texture;
}
+// Load a texture from image data
+// NOTE: image is not unloaded, it must be done manually
+Texture2D LoadTextureFromImage(Image image, bool genMipmaps)
+{
+ Texture2D texture;
+
+ // Init texture to default values
+ texture.id = 0;
+ texture.width = 0;
+ texture.height = 0;
+
+ if ((image.pixels != NULL) && (image.width > 0) && (image.height > 0))
+ {
+ unsigned char *imgData = malloc(image.width * image.height * 4);
+
+ int j = 0;
+
+ for (int i = 0; i < image.width * image.height * 4; i += 4)
+ {
+ imgData[i] = image.pixels[j].r;
+ imgData[i+1] = image.pixels[j].g;
+ imgData[i+2] = image.pixels[j].b;
+ imgData[i+3] = image.pixels[j].a;
+
+ j++;
+ }
+
+ // NOTE: rlglLoadTexture() can generate mipmaps (POT image required)
+ texture.id = rlglLoadTexture(imgData, image.width, image.height, genMipmaps);
+
+ texture.width = image.width;
+ texture.height = image.height;
+
+ free(imgData);
+ }
+ else TraceLog(WARNING, "Texture could not be loaded, image data is not valid");
+
+ return texture;
+}
+
+// [DEPRECATED] Load a texture from image data
+// NOTE: Use LoadTextureFromImage() instead
+Texture2D CreateTexture(Image image, bool genMipmaps)
+{
+ Texture2D texture;
+
+ texture = LoadTextureFromImage(image, genMipmaps);
+
+ TraceLog(INFO, "Created texture id: %i", texture.id);
+
+ return texture;
+}
+
// Load an image as texture from rRES file (raylib Resource)
Texture2D LoadTextureFromRES(const char *rresName, int resId)
{
Texture2D texture;
Image image = LoadImageFromRES(rresName, resId);
- texture = CreateTexture(image, false);
+ texture = LoadTextureFromImage(image, false);
UnloadImage(image);
return texture;
@@ -369,6 +431,41 @@ void UnloadTexture(Texture2D texture)
rlDeleteTextures(texture.id);
}
+// Convert image to POT (power-of-two)
+// NOTE: Requirement on OpenGL ES 2.0 (RPI, HTML5)
+void ConvertToPOT(Image *image, Color fillColor)
+{
+ // Just add the required amount of pixels at the right and bottom sides of image...
+ int potWidth = GetNextPOT(image->width);
+ int potHeight = GetNextPOT(image->height);
+
+ // Check if POT texture generation is required (if texture is not already POT)
+ if ((potWidth != image->width) || (potHeight != image->height))
+ {
+ Color *imgDataPixelPOT = NULL;
+
+ // Generate POT array from NPOT data
+ imgDataPixelPOT = (Color *)malloc(potWidth * potHeight * sizeof(Color));
+
+ for (int j = 0; j < potHeight; j++)
+ {
+ for (int i = 0; i < potWidth; i++)
+ {
+ if ((j < image->height) && (i < image->width)) imgDataPixelPOT[j*potWidth + i] = image->pixels[j*image->width + i];
+ else imgDataPixelPOT[j*potWidth + i] = fillColor;
+ }
+ }
+
+ TraceLog(WARNING, "Image converted to POT: (%ix%i) -> (%ix%i)", image->width, image->height, potWidth, potHeight);
+
+ free(image->pixels);
+
+ image->pixels = imgDataPixelPOT;
+ image->width = potWidth;
+ image->height = potHeight;
+ }
+}
+
// Draw a Texture2D
void DrawTexture(Texture2D texture, int posX, int posY, Color tint)
{
@@ -436,49 +533,13 @@ void DrawTexturePro(Texture2D texture, Rectangle sourceRec, Rectangle destRec, V
rlDisableTexture();
}
-// Create a texture from an image
-// NOTE: image is not unloaded, iot must be done manually
-Texture2D CreateTexture(Image image, bool genMipmaps)
-{
- Texture2D texture;
-
- // Init texture to default values
- texture.id = 0;
- texture.width = 0;
- texture.height = 0;
-
- if (image.pixels != NULL)
- {
- unsigned char *imgData = malloc(image.width * image.height * 4);
-
- int j = 0;
-
- for (int i = 0; i < image.width * image.height * 4; i += 4)
- {
- imgData[i] = image.pixels[j].r;
- imgData[i+1] = image.pixels[j].g;
- imgData[i+2] = image.pixels[j].b;
- imgData[i+3] = image.pixels[j].a;
-
- j++;
- }
-
- // NOTE: rlglLoadTexture() can generate mipmaps (POT image required)
- texture.id = rlglLoadTexture(imgData, image.width, image.height, genMipmaps);
-
- texture.width = image.width;
- texture.height = image.height;
-
- free(imgData);
- }
- else TraceLog(WARNING, "Texture could not be created, image data is not valid");
-
- return texture;
-}
+//----------------------------------------------------------------------------------
+// Module specific Functions Definition
+//----------------------------------------------------------------------------------
// Loading DDS image data (compressed or uncompressed)
// NOTE: Compressed data loading not supported on OpenGL 1.1
-ImageEx LoadDDS(const char *fileName)
+static ImageEx LoadDDS(const char *fileName)
{
#define FOURCC_DXT1 0x31545844 // Equivalent to "DXT1" in ASCII
#define FOURCC_DXT3 0x33545844 // Equivalent to "DXT3" in ASCII
@@ -528,12 +589,18 @@ ImageEx LoadDDS(const char *fileName)
ImageEx image;
ddsHeader header;
+
+ image.data = NULL;
+ image.width = 0;
+ image.height = 0;
+ image.mipmaps = 0;
+ image.compFormat = 0;
FILE *ddsFile = fopen(fileName, "rb");
if (ddsFile == NULL)
{
- TraceLog(WARNING, "DDS File could not be opened");
+ TraceLog(WARNING, "[%s] DDS image file could not be opened", fileName);
}
else
{
@@ -544,7 +611,7 @@ ImageEx LoadDDS(const char *fileName)
if (strncmp(filecode, "DDS ", 4) != 0)
{
- TraceLog(WARNING, "DDS File does not seem to be valid");
+ TraceLog(WARNING, "[%s] DDS file does not seem to be a valid image", fileName);
fclose(ddsFile);
}
else
@@ -636,7 +703,7 @@ ImageEx LoadDDS(const char *fileName)
// Loading PKM image data (ETC1/ETC2 compression)
// NOTE: KTX is the standard Khronos Group compression format (ETC1/ETC2, mipmaps)
// PKM is a much simpler file format used mainly to contain a single ETC1/ETC2 compressed image (no mipmaps)
-ImageEx LoadPKM(const char *fileName)
+static ImageEx LoadPKM(const char *fileName)
{
// If OpenGL ES 2.0. the following format could be supported (ETC1):
//GL_ETC1_RGB8_OES
@@ -679,7 +746,7 @@ ImageEx LoadPKM(const char *fileName)
if (pkmFile == NULL)
{
- TraceLog(WARNING, "[%s] PKM File could not be opened", fileName);
+ TraceLog(WARNING, "[%s] PKM image file could not be opened", fileName);
}
else
{
@@ -690,7 +757,7 @@ ImageEx LoadPKM(const char *fileName)
if (strncmp(filecode, "PKM ", 4) != 0)
{
- TraceLog(WARNING, "[%s] PKM File does not seem to be valid", fileName);
+ TraceLog(WARNING, "[%s] PKM file does not seem to be a valid image", fileName);
fclose(pkmFile);
}
else
diff --git a/src/utils.c b/src/utils.c
index b5112111..c3c20b47 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -133,18 +133,25 @@ void WriteBitmap(const char *fileName, unsigned char *imgData, int width, int he
FILE *bmpFile = fopen(fileName, "wb"); // Define a pointer to bitmap file and open it in write-binary mode
- // NOTE: fwrite parameters are: data pointer, size in bytes of each element to be written, number of elements, file-to-write pointer
- fwrite(bmpFileHeader, sizeof(unsigned char), 14, bmpFile); // Write BMP file header data
- fwrite(bmpInfoHeader, sizeof(unsigned char), 40, bmpFile); // Write BMP info header data
-
- // Write pixel data to file
- for (int y = 0; y < height ; y++)
+ if (bmpFile == NULL)
+ {
+ TraceLog(WARNING, "[%s] BMP file could not be created", fileName);
+ }
+ else
{
- for (int x = 0; x < width; x++)
+ // NOTE: fwrite parameters are: data pointer, size in bytes of each element to be written, number of elements, file-to-write pointer
+ fwrite(bmpFileHeader, sizeof(unsigned char), 14, bmpFile); // Write BMP file header data
+ fwrite(bmpInfoHeader, sizeof(unsigned char), 40, bmpFile); // Write BMP info header data
+
+ // Write pixel data to file
+ for (int y = 0; y < height ; y++)
{
- fputc(imgData[(x*4)+2 + (y*width*4)], bmpFile);
- fputc(imgData[(x*4)+1 + (y*width*4)], bmpFile);
- fputc(imgData[(x*4) + (y*width*4)], bmpFile);
+ for (int x = 0; x < width; x++)
+ {
+ fputc(imgData[(x*4)+2 + (y*width*4)], bmpFile);
+ fputc(imgData[(x*4)+1 + (y*width*4)], bmpFile);
+ fputc(imgData[(x*4) + (y*width*4)], bmpFile);
+ }
}
}
@@ -157,7 +164,9 @@ void WritePNG(const char *fileName, unsigned char *imgData, int width, int heigh
{
stbi_write_png(fileName, width, height, 4, imgData, width*4); // It WORKS!!!
}
+#endif
+#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_RPI) || defined(PLATFORM_WEB)
// Outputs a trace log message (INFO, ERROR, WARNING)
// NOTE: If a file has been init, output log is written there
void TraceLog(int msgType, const char *text, ...)
@@ -262,6 +271,23 @@ const char *GetExtension(const char *fileName)
return (dot + 1);
}
+// Calculate next power-of-two value for a given num
+int GetNextPOT(int num)
+{
+ if (num != 0)
+ {
+ num--;
+ num |= (num >> 1); // Or first 2 bits
+ num |= (num >> 2); // Or next 2 bits
+ num |= (num >> 4); // Or next 4 bits
+ num |= (num >> 8); // Or next 8 bits
+ num |= (num >> 16); // Or next 16 bits
+ num++;
+ }
+
+ return num;
+}
+
//----------------------------------------------------------------------------------
// Module specific Functions Definition
//----------------------------------------------------------------------------------
diff --git a/src/utils.h b/src/utils.h
index 784c7926..882aebf6 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -77,6 +77,7 @@ void WritePNG(const char *fileName, unsigned char *imgData, int width, int heigh
void TraceLog(int msgType, const char *text, ...); // Outputs a trace log message
const char *GetExtension(const char *fileName); // Returns extension of a filename
+int GetNextPOT(int num); // Calculate next power-of-two value for a given num
#if defined(PLATFORM_ANDROID)
void InitAssetManager(AAssetManager *manager); // Initialize asset manager from android app