Merge pull request #244 from taril42/master

Json parser optimizations for spine-c.
This commit is contained in:
Nathan Sweet 2014-07-24 02:54:39 +02:00
commit 0d10ea46a0
2 changed files with 151 additions and 68 deletions

View File

@ -23,11 +23,28 @@
/* Json */ /* Json */
/* JSON parser in C. */ /* JSON parser in C. */
#ifndef _DEFAULT_SOURCE
/* Bring strings.h definitions into string.h, where appropriate */
#define _DEFAULT_SOURCE
#endif
#ifndef _BSD_SOURCE
/* Bring strings.h definitions into string.h, where appropriate */
#define _BSD_SOURCE
#endif
#include "Json.h" #include "Json.h"
#include <stdio.h> #include <stdio.h>
#include <ctype.h> #include <ctype.h>
#include <stdlib.h> /* strtod (C89), strtof (C99) */
#include <string.h> /* strcasecmp (4.4BSD - compatibility), _stricmp (_WIN32) */
#include <spine/extension.h> #include <spine/extension.h>
#ifndef SPINE_JSON_DEBUG
/* Define this to do extra NULL and expected-character checking */
#define SPINE_JSON_DEBUG 0
#endif
static const char* ep; static const char* ep;
const char* Json_getError (void) { const char* Json_getError (void) {
@ -35,11 +52,23 @@ const char* Json_getError (void) {
} }
static int Json_strcasecmp (const char* s1, const char* s2) { static int Json_strcasecmp (const char* s1, const char* s2) {
if (!s1) return (s1 == s2) ? 0 : 1; /* TODO we may be able to elide these NULL checks if we can prove
if (!s2) return 1; * the graph and input (only callsite is Json_getItem) should not have NULLs
for (; tolower(*s1) == tolower(*s2); ++s1, ++s2) */
if (*s1 == 0) return 0; if ( s1 && s2 )
return tolower(*(const unsigned char*)s1) - tolower(*(const unsigned char*)s2); {
#if defined(_WIN32)
return _stricmp( s1, s2 );
#else
return strcasecmp( s1, s2 );
#endif
}
else
{
if ( s1 < s2 ) return -1; /* s1 is null, s2 is not */
else if ( s1 == s2 ) return 0; /* both are null */
else return 1; /* s2 is nul s1 is not */
}
} }
/* Internal constructor. */ /* Internal constructor. */
@ -62,37 +91,39 @@ void Json_dispose (Json *c) {
/* Parse the input text to generate a number, and populate the result into item. */ /* Parse the input text to generate a number, and populate the result into item. */
static const char* parse_number (Json *item, const char* num) { static const char* parse_number (Json *item, const char* num) {
float n = 0, sign = 1, scale = 0; char * endptr;
int subscale = 0, signsubscale = 1; float n;
/* Could use sscanf for this? */ /* Using strtod and strtof is slightly more permissive than RFC4627,
if (*num == '-') sign = -1, num++; /* Has sign? */ * accepting for example hex-encoded floating point, but either
if (*num == '0') num++; /* is zero */ * is often leagues faster than any manual implementation.
if (*num >= '1' && *num <= '9') do *
n = (n * 10.0f) + (*num++ - '0'); * We also already know that this starts with [-0-9] from parse_value.
while (*num >= '0' && *num <= '9'); /* Number? */ */
if (*num == '.' && num[1] >= '0' && num[1] <= '9') { #if __STDC_VERSION__ >= 199901L
num++; n = strtof( num, &endptr );
do #else
n = (n * 10.0f) + (*num++ - '0'), scale--; n = (float)strtod( num, &endptr );
while (*num >= '0' && *num <= '9'); #endif
} /* Fractional part? */ /* ignore errno's ERANGE, which returns +/-HUGE_VAL */
if (*num == 'e' || *num == 'E') /* Exponent? */ /* n is 0 on any other error */
if ( endptr != num )
{ {
num++; /* Parse success, number found. */
if (*num == '+') item->valueFloat = n;
num++; item->valueInt = (int)n;
else if (*num == '-') signsubscale = -1, num++; /* With sign? */ item->type = Json_Number;
while (*num >= '0' && *num <= '9') return endptr;
subscale = (subscale * 10) + (*num++ - '0'); /* Number? */ }
else
{
/* Parse failure, ep is set. */
ep = num;
return 0;
} }
n = sign * n * (float)pow(10.0f, (scale + subscale * signsubscale)); /* number = +/- number.fraction * 10^+/- exponent */
item->valueFloat = n;
item->valueInt = (int)n;
item->type = Json_Number;
return num;
} }
/* Parse the input text into an unescaped cstring, and populate item. */ /* Parse the input text into an unescaped cstring, and populate item. */
@ -103,7 +134,7 @@ static const char* parse_string (Json *item, const char* str) {
char* out; char* out;
int len = 0; int len = 0;
unsigned uc, uc2; unsigned uc, uc2;
if (*str != '\"') { if (*str != '\"') { /* TODO: don't need this check when called from parse_value, but do need from parse_object */
ep = str; ep = str;
return 0; return 0;
} /* not a string! */ } /* not a string! */
@ -143,6 +174,7 @@ static const char* parse_string (Json *item, const char* str) {
if ((uc >= 0xDC00 && uc <= 0xDFFF) || uc == 0) break; /* check for invalid. */ if ((uc >= 0xDC00 && uc <= 0xDFFF) || uc == 0) break; /* check for invalid. */
/* TODO provide an option to ignore surrogates, use unicode replacement character? */
if (uc >= 0xD800 && uc <= 0xDBFF) /* UTF16 surrogate pairs. */ if (uc >= 0xD800 && uc <= 0xDBFF) /* UTF16 surrogate pairs. */
{ {
if (ptr[1] != '\\' || ptr[2] != 'u') break; /* missing second-half of surrogate. */ if (ptr[1] != '\\' || ptr[2] != 'u') break; /* missing second-half of surrogate. */
@ -164,12 +196,15 @@ static const char* parse_string (Json *item, const char* str) {
case 4: case 4:
*--ptr2 = ((uc | 0x80) & 0xBF); *--ptr2 = ((uc | 0x80) & 0xBF);
uc >>= 6; uc >>= 6;
/* fallthrough */
case 3: case 3:
*--ptr2 = ((uc | 0x80) & 0xBF); *--ptr2 = ((uc | 0x80) & 0xBF);
uc >>= 6; uc >>= 6;
/* fallthrough */
case 2: case 2:
*--ptr2 = ((uc | 0x80) & 0xBF); *--ptr2 = ((uc | 0x80) & 0xBF);
uc >>= 6; uc >>= 6;
/* fallthrough */
case 1: case 1:
*--ptr2 = (uc | firstByteMark[len]); *--ptr2 = (uc | firstByteMark[len]);
} }
@ -183,7 +218,7 @@ static const char* parse_string (Json *item, const char* str) {
} }
} }
*ptr2 = 0; *ptr2 = 0;
if (*ptr == '\"') ptr++; if (*ptr == '\"') ptr++; /* TODO error handling if not \" or \0 ? */
item->valueString = out; item->valueString = out;
item->type = Json_String; item->type = Json_String;
return ptr; return ptr;
@ -196,20 +231,22 @@ static const char* parse_object (Json *item, const char* value);
/* Utility to jump whitespace and cr/lf */ /* Utility to jump whitespace and cr/lf */
static const char* skip (const char* in) { static const char* skip (const char* in) {
while (in && *in && (unsigned char)*in <= 32) if ( !in ) return 0; /* must propagate NULL since it's often called in skip(f(...)) form */
while (*in && (unsigned char)*in <= 32)
in++; in++;
return in; return in;
} }
/* Parse an object - create a new root, and populate. */ /* Parse an object - create a new root, and populate. */
Json *Json_create (const char* value) { Json *Json_create (const char* value) {
const char* end = 0; Json *c;
Json *c = Json_new();
ep = 0; ep = 0;
if (!value) return 0; /* only place we check for NULL other than skip() */
c = Json_new();
if (!c) return 0; /* memory fail */ if (!c) return 0; /* memory fail */
end = parse_value(c, skip(value)); value = parse_value(c, skip(value));
if (!end) { if (!value) {
Json_dispose(c); Json_dispose(c);
return 0; return 0;
} /* parse failure. ep is set. */ } /* parse failure. ep is set. */
@ -219,31 +256,60 @@ Json *Json_create (const char* value) {
/* Parser core - when encountering text, process appropriately. */ /* Parser core - when encountering text, process appropriately. */
static const char* parse_value (Json *item, const char* value) { static const char* parse_value (Json *item, const char* value) {
/* Referenced by Json_create(), parse_array(), and parse_object(). */
/* Always called with the result of skip(). */
#if SPINE_JSON_DEBUG /* Checked at entry to graph, Json_create, and after every parse_ call. */
if (!value) return 0; /* Fail on null. */ if (!value) return 0; /* Fail on null. */
if (!strncmp(value, "null", 4)) { #endif
item->type = Json_NULL;
return value + 4; switch ( *value )
} {
if (!strncmp(value, "false", 5)) { case 'n':
item->type = Json_False; {
return value + 5; if (!strncmp(value+1, "ull", 3)) {
} item->type = Json_NULL;
if (!strncmp(value, "true", 4)) { return value + 4;
item->type = Json_True; }
item->valueInt = 1; break;
return value + 4; }
} case 'f':
if (*value == '\"') { {
return parse_string(item, value); if (!strncmp(value+1, "alse", 4)) {
} item->type = Json_False;
if (*value == '-' || (*value >= '0' && *value <= '9')) { /* calloc prevents us needing item->type = Json_False or valueInt = 0 here */
return parse_number(item, value); return value + 5;
} }
if (*value == '[') { break;
return parse_array(item, value); }
} case 't':
if (*value == '{') { {
return parse_object(item, value); if (!strncmp(value+1, "rue", 3)) {
item->type = Json_True;
item->valueInt = 1;
return value + 4;
}
break;
}
case '\"':
return parse_string(item, value);
case '[':
return parse_array(item, value);
case '{':
return parse_object(item, value);
case '-': /* fallthrough */
case '0': /* fallthrough */
case '1': /* fallthrough */
case '2': /* fallthrough */
case '3': /* fallthrough */
case '4': /* fallthrough */
case '5': /* fallthrough */
case '6': /* fallthrough */
case '7': /* fallthrough */
case '8': /* fallthrough */
case '9':
return parse_number(item, value);
default:
break;
} }
ep = value; ep = value;
@ -253,10 +319,13 @@ static const char* parse_value (Json *item, const char* value) {
/* Build an array from input text. */ /* Build an array from input text. */
static const char* parse_array (Json *item, const char* value) { static const char* parse_array (Json *item, const char* value) {
Json *child; Json *child;
#if SPINE_JSON_DEBUG /* unnecessary, only callsite (parse_value) verifies this */
if (*value != '[') { if (*value != '[') {
ep = value; ep = value;
return 0; return 0;
} /* not an array! */ } /* not an array! */
#endif
item->type = Json_Array; item->type = Json_Array;
value = skip(value + 1); value = skip(value + 1);
@ -269,13 +338,15 @@ static const char* parse_array (Json *item, const char* value) {
item->size = 1; item->size = 1;
while (*value == ',') { while (*value == ',') {
Json *new_item; Json *new_item = Json_new();
if (!(new_item = Json_new())) return 0; /* memory fail */ if (!new_item) return 0; /* memory fail */
child->next = new_item; child->next = new_item;
#if SPINE_JSON_HAVE_PREV
new_item->prev = child; new_item->prev = child;
#endif
child = new_item; child = new_item;
value = skip(parse_value(child, skip(value + 1))); value = skip(parse_value(child, skip(value + 1)));
if (!value) return 0; /* memory fail */ if (!value) return 0; /* parse fail */
item->size++; item->size++;
} }
@ -287,10 +358,13 @@ static const char* parse_array (Json *item, const char* value) {
/* Build an object from the text. */ /* Build an object from the text. */
static const char* parse_object (Json *item, const char* value) { static const char* parse_object (Json *item, const char* value) {
Json *child; Json *child;
#if SPINE_JSON_DEBUG /* unnecessary, only callsite (parse_value) verifies this */
if (*value != '{') { if (*value != '{') {
ep = value; ep = value;
return 0; return 0;
} /* not an object! */ } /* not an object! */
#endif
item->type = Json_Object; item->type = Json_Object;
value = skip(value + 1); value = skip(value + 1);
@ -311,10 +385,12 @@ static const char* parse_object (Json *item, const char* value) {
item->size = 1; item->size = 1;
while (*value == ',') { while (*value == ',') {
Json *new_item; Json *new_item = Json_new();
if (!(new_item = Json_new())) return 0; /* memory fail */ if (!new_item) return 0; /* memory fail */
child->next = new_item; child->next = new_item;
#if SPINE_JSON_HAVE_PREV
new_item->prev = child; new_item->prev = child;
#endif
child = new_item; child = new_item;
value = skip(parse_string(child, skip(value + 1))); value = skip(parse_string(child, skip(value + 1)));
if (!value) return 0; if (!value) return 0;

View File

@ -38,10 +38,17 @@ extern "C" {
#define Json_Array 5 #define Json_Array 5
#define Json_Object 6 #define Json_Object 6
#ifndef SPINE_JSON_HAVE_PREV
/* Spine doesn't use the "prev" link in the Json sibling lists. */
#define SPINE_JSON_HAVE_PREV 0
#endif
/* The Json structure: */ /* The Json structure: */
typedef struct Json { typedef struct Json {
struct Json* next; struct Json* next;
#if SPINE_JSON_HAVE_PREV
struct Json* prev; /* next/prev allow you to walk array/object chains. Alternatively, use getSize/getItem */ struct Json* prev; /* next/prev allow you to walk array/object chains. Alternatively, use getSize/getItem */
#endif
struct Json* child; /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */ struct Json* child; /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */
int type; /* The type of the item, as above. */ int type; /* The type of the item, as above. */