From 1135bf8a3ce084beb3285a4e0a842495beed0e1f Mon Sep 17 00:00:00 2001 From: Leander Hasty Date: Fri, 20 Jun 2014 11:46:40 -0400 Subject: [PATCH] Json parser optimizations for spine-c. Various optimizations for Json.c and Json.h. parse_value uses a switch on starting character and avoids three strncmp calls in many cases. parse_number dispatches to strtod (C89) or strtof (C99), which is a slightly more permissive superset of JSON's RFC4627 number parsing, but is also quite a bit faster. More explicit error handling added here too. (The parse_value and parse_number changes alone made for a ~1.5x speedup on our tiny ARM926 platform using uclibc 0.9.30 and gcc 4.3.3.) Use _stricmp or strcasecmp in Json_strcasecmp. This one has a tricky #include issue; it's in in some systems and in others. Defining _DEFAULT_SOURCE and/or _BSD_SOURCE pulls it into in all of the systems we could find. This is a noticeable win, but less than parse_value or parse_number. Elide NULL checks in almost all functions, insert one during Json_create. Everywhere else, the code already explicitly checks and returns. skip() keeps a NULL check but pushes it outside the loop, so we only check it on entry to the method. Also elide some redundant starting character checks for parse_object ('{') and parse_array ('[') since they're only called from parse_value and we know the character is correct. There's an opportunity to do this also for parse_string, but it's a bit more complicated as it's called from parse_object as well as parse_value. For all these elided checks, allow them to reappear if SPINE_JSON_DEBUG is defined and nonzero. Spine doesn't use the "prev" field ever, so drop it entirely from the Json structure unless someone defines SPINE_JSON_HAVE_PREV non-zero. Pull some of the assignments out of conditionals to placate some of the higher-warning-level compilers. A few TODO comments left near some of the other speedup opportunities, as well as one existing place that might need additional error handling. (Tested on various x86 and ARM gcc versions, Xcode 5.x series, Visual Studio 2010 SP1, ARMCC 5.04, and MWCCARM 4.0. Holler if this should be decomposed into a patch series, if there are any suggestions or idiom changes, or a different submission method we should be using.) --- spine-c/src/spine/Json.c | 212 ++++++++++++++++++++++++++------------- spine-c/src/spine/Json.h | 7 ++ 2 files changed, 151 insertions(+), 68 deletions(-) diff --git a/spine-c/src/spine/Json.c b/spine-c/src/spine/Json.c index 3a467accf..da13a1201 100644 --- a/spine-c/src/spine/Json.c +++ b/spine-c/src/spine/Json.c @@ -23,11 +23,28 @@ /* Json */ /* JSON parser in C. */ +#ifndef _DEFAULT_SOURCE +/* Bring strings.h definitions into string.h, where appropriate */ +#define _DEFAULT_SOURCE +#endif + +#ifndef _BSD_SOURCE +/* Bring strings.h definitions into string.h, where appropriate */ +#define _BSD_SOURCE +#endif + #include "Json.h" #include #include +#include /* strtod (C89), strtof (C99) */ +#include /* strcasecmp (4.4BSD - compatibility), _stricmp (_WIN32) */ #include +#ifndef SPINE_JSON_DEBUG +/* Define this to do extra NULL and expected-character checking */ +#define SPINE_JSON_DEBUG 0 +#endif + static const char* ep; const char* Json_getError (void) { @@ -35,11 +52,23 @@ const char* Json_getError (void) { } static int Json_strcasecmp (const char* s1, const char* s2) { - if (!s1) return (s1 == s2) ? 0 : 1; - if (!s2) return 1; - for (; tolower(*s1) == tolower(*s2); ++s1, ++s2) - if (*s1 == 0) return 0; - return tolower(*(const unsigned char*)s1) - tolower(*(const unsigned char*)s2); + /* TODO we may be able to elide these NULL checks if we can prove + * the graph and input (only callsite is Json_getItem) should not have NULLs + */ + if ( s1 && s2 ) + { +#if defined(_WIN32) + return _stricmp( s1, s2 ); +#else + return strcasecmp( s1, s2 ); +#endif + } + else + { + if ( s1 < s2 ) return -1; /* s1 is null, s2 is not */ + else if ( s1 == s2 ) return 0; /* both are null */ + else return 1; /* s2 is nul s1 is not */ + } } /* Internal constructor. */ @@ -62,37 +91,39 @@ void Json_dispose (Json *c) { /* Parse the input text to generate a number, and populate the result into item. */ static const char* parse_number (Json *item, const char* num) { - float n = 0, sign = 1, scale = 0; - int subscale = 0, signsubscale = 1; + char * endptr; + float n; - /* Could use sscanf for this? */ - if (*num == '-') sign = -1, num++; /* Has sign? */ - if (*num == '0') num++; /* is zero */ - if (*num >= '1' && *num <= '9') do - n = (n * 10.0f) + (*num++ - '0'); - while (*num >= '0' && *num <= '9'); /* Number? */ - if (*num == '.' && num[1] >= '0' && num[1] <= '9') { - num++; - do - n = (n * 10.0f) + (*num++ - '0'), scale--; - while (*num >= '0' && *num <= '9'); - } /* Fractional part? */ - if (*num == 'e' || *num == 'E') /* Exponent? */ + /* Using strtod and strtof is slightly more permissive than RFC4627, + * accepting for example hex-encoded floating point, but either + * is often leagues faster than any manual implementation. + * + * We also already know that this starts with [-0-9] from parse_value. + */ +#if __STDC_VERSION__ >= 199901L + n = strtof( num, &endptr ); +#else + n = (float)strtod( num, &endptr ); +#endif + /* ignore errno's ERANGE, which returns +/-HUGE_VAL */ + /* n is 0 on any other error */ + + + if ( endptr != num ) { - num++; - if (*num == '+') - num++; - else if (*num == '-') signsubscale = -1, num++; /* With sign? */ - while (*num >= '0' && *num <= '9') - subscale = (subscale * 10) + (*num++ - '0'); /* Number? */ + /* Parse success, number found. */ + item->valueFloat = n; + item->valueInt = (int)n; + item->type = Json_Number; + return endptr; + } + else + { + /* Parse failure, ep is set. */ + ep = num; + return 0; } - n = sign * n * (float)pow(10.0f, (scale + subscale * signsubscale)); /* number = +/- number.fraction * 10^+/- exponent */ - - item->valueFloat = n; - item->valueInt = (int)n; - item->type = Json_Number; - return num; } /* Parse the input text into an unescaped cstring, and populate item. */ @@ -103,7 +134,7 @@ static const char* parse_string (Json *item, const char* str) { char* out; int len = 0; unsigned uc, uc2; - if (*str != '\"') { + if (*str != '\"') { /* TODO: don't need this check when called from parse_value, but do need from parse_object */ ep = str; return 0; } /* not a string! */ @@ -143,6 +174,7 @@ static const char* parse_string (Json *item, const char* str) { if ((uc >= 0xDC00 && uc <= 0xDFFF) || uc == 0) break; /* check for invalid. */ + /* TODO provide an option to ignore surrogates, use unicode replacement character? */ if (uc >= 0xD800 && uc <= 0xDBFF) /* UTF16 surrogate pairs. */ { if (ptr[1] != '\\' || ptr[2] != 'u') break; /* missing second-half of surrogate. */ @@ -164,12 +196,15 @@ static const char* parse_string (Json *item, const char* str) { case 4: *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; + /* fallthrough */ case 3: *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; + /* fallthrough */ case 2: *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; + /* fallthrough */ case 1: *--ptr2 = (uc | firstByteMark[len]); } @@ -183,7 +218,7 @@ static const char* parse_string (Json *item, const char* str) { } } *ptr2 = 0; - if (*ptr == '\"') ptr++; + if (*ptr == '\"') ptr++; /* TODO error handling if not \" or \0 ? */ item->valueString = out; item->type = Json_String; return ptr; @@ -196,20 +231,22 @@ static const char* parse_object (Json *item, const char* value); /* Utility to jump whitespace and cr/lf */ static const char* skip (const char* in) { - while (in && *in && (unsigned char)*in <= 32) + if ( !in ) return 0; /* must propagate NULL since it's often called in skip(f(...)) form */ + while (*in && (unsigned char)*in <= 32) in++; return in; } /* Parse an object - create a new root, and populate. */ Json *Json_create (const char* value) { - const char* end = 0; - Json *c = Json_new(); + Json *c; ep = 0; + if (!value) return 0; /* only place we check for NULL other than skip() */ + c = Json_new(); if (!c) return 0; /* memory fail */ - end = parse_value(c, skip(value)); - if (!end) { + value = parse_value(c, skip(value)); + if (!value) { Json_dispose(c); return 0; } /* parse failure. ep is set. */ @@ -219,31 +256,60 @@ Json *Json_create (const char* value) { /* Parser core - when encountering text, process appropriately. */ static const char* parse_value (Json *item, const char* value) { + /* Referenced by Json_create(), parse_array(), and parse_object(). */ + /* Always called with the result of skip(). */ +#if SPINE_JSON_DEBUG /* Checked at entry to graph, Json_create, and after every parse_ call. */ if (!value) return 0; /* Fail on null. */ - if (!strncmp(value, "null", 4)) { - item->type = Json_NULL; - return value + 4; - } - if (!strncmp(value, "false", 5)) { - item->type = Json_False; - return value + 5; - } - if (!strncmp(value, "true", 4)) { - item->type = Json_True; - item->valueInt = 1; - return value + 4; - } - if (*value == '\"') { - return parse_string(item, value); - } - if (*value == '-' || (*value >= '0' && *value <= '9')) { - return parse_number(item, value); - } - if (*value == '[') { - return parse_array(item, value); - } - if (*value == '{') { - return parse_object(item, value); +#endif + + switch ( *value ) + { + case 'n': + { + if (!strncmp(value+1, "ull", 3)) { + item->type = Json_NULL; + return value + 4; + } + break; + } + case 'f': + { + if (!strncmp(value+1, "alse", 4)) { + item->type = Json_False; + /* calloc prevents us needing item->type = Json_False or valueInt = 0 here */ + return value + 5; + } + break; + } + case 't': + { + if (!strncmp(value+1, "rue", 3)) { + item->type = Json_True; + item->valueInt = 1; + return value + 4; + } + break; + } + case '\"': + return parse_string(item, value); + case '[': + return parse_array(item, value); + case '{': + return parse_object(item, value); + case '-': /* fallthrough */ + case '0': /* fallthrough */ + case '1': /* fallthrough */ + case '2': /* fallthrough */ + case '3': /* fallthrough */ + case '4': /* fallthrough */ + case '5': /* fallthrough */ + case '6': /* fallthrough */ + case '7': /* fallthrough */ + case '8': /* fallthrough */ + case '9': + return parse_number(item, value); + default: + break; } ep = value; @@ -253,10 +319,13 @@ static const char* parse_value (Json *item, const char* value) { /* Build an array from input text. */ static const char* parse_array (Json *item, const char* value) { Json *child; + +#if SPINE_JSON_DEBUG /* unnecessary, only callsite (parse_value) verifies this */ if (*value != '[') { ep = value; return 0; } /* not an array! */ +#endif item->type = Json_Array; value = skip(value + 1); @@ -269,13 +338,15 @@ static const char* parse_array (Json *item, const char* value) { item->size = 1; while (*value == ',') { - Json *new_item; - if (!(new_item = Json_new())) return 0; /* memory fail */ + Json *new_item = Json_new(); + if (!new_item) return 0; /* memory fail */ child->next = new_item; +#if SPINE_JSON_HAVE_PREV new_item->prev = child; +#endif child = new_item; value = skip(parse_value(child, skip(value + 1))); - if (!value) return 0; /* memory fail */ + if (!value) return 0; /* parse fail */ item->size++; } @@ -287,10 +358,13 @@ static const char* parse_array (Json *item, const char* value) { /* Build an object from the text. */ static const char* parse_object (Json *item, const char* value) { Json *child; + +#if SPINE_JSON_DEBUG /* unnecessary, only callsite (parse_value) verifies this */ if (*value != '{') { ep = value; return 0; } /* not an object! */ +#endif item->type = Json_Object; value = skip(value + 1); @@ -311,10 +385,12 @@ static const char* parse_object (Json *item, const char* value) { item->size = 1; while (*value == ',') { - Json *new_item; - if (!(new_item = Json_new())) return 0; /* memory fail */ + Json *new_item = Json_new(); + if (!new_item) return 0; /* memory fail */ child->next = new_item; +#if SPINE_JSON_HAVE_PREV new_item->prev = child; +#endif child = new_item; value = skip(parse_string(child, skip(value + 1))); if (!value) return 0; diff --git a/spine-c/src/spine/Json.h b/spine-c/src/spine/Json.h index bc11a81ac..bf72dc486 100644 --- a/spine-c/src/spine/Json.h +++ b/spine-c/src/spine/Json.h @@ -38,10 +38,17 @@ extern "C" { #define Json_Array 5 #define Json_Object 6 +#ifndef SPINE_JSON_HAVE_PREV +/* Spine doesn't use the "prev" link in the Json sibling lists. */ +#define SPINE_JSON_HAVE_PREV 0 +#endif + /* The Json structure: */ typedef struct Json { struct Json* next; +#if SPINE_JSON_HAVE_PREV struct Json* prev; /* next/prev allow you to walk array/object chains. Alternatively, use getSize/getItem */ +#endif struct Json* child; /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */ int type; /* The type of the item, as above. */