reader.h 78 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879
  1. // Tencent is pleased to support the open source community by making RapidJSON available.
  2. //
  3. // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4. //
  5. // Licensed under the MIT License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // http://opensource.org/licenses/MIT
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifndef RAPIDJSON_READER_H_
  15. #define RAPIDJSON_READER_H_
  16. /*! \file reader.h */
  17. #include "allocators.h"
  18. #include "stream.h"
  19. #include "encodedstream.h"
  20. #include "internal/meta.h"
  21. #include "internal/stack.h"
  22. #include "internal/strtod.h"
  23. #include <limits>
  24. #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
  25. #include <intrin.h>
  26. #pragma intrinsic(_BitScanForward)
  27. #endif
  28. #ifdef RAPIDJSON_SSE42
  29. #include <nmmintrin.h>
  30. #elif defined(RAPIDJSON_SSE2)
  31. #include <emmintrin.h>
  32. #endif
  33. #ifdef _MSC_VER
  34. RAPIDJSON_DIAG_PUSH
  35. RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
  36. RAPIDJSON_DIAG_OFF(4702) // unreachable code
  37. #endif
  38. #ifdef __clang__
  39. RAPIDJSON_DIAG_PUSH
  40. RAPIDJSON_DIAG_OFF(old-style-cast)
  41. RAPIDJSON_DIAG_OFF(padded)
  42. RAPIDJSON_DIAG_OFF(switch-enum)
  43. #endif
  44. #ifdef __GNUC__
  45. RAPIDJSON_DIAG_PUSH
  46. RAPIDJSON_DIAG_OFF(effc++)
  47. #endif
  48. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  49. #define RAPIDJSON_NOTHING /* deliberately empty */
  50. #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
  51. #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
  52. RAPIDJSON_MULTILINEMACRO_BEGIN \
  53. if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
  54. RAPIDJSON_MULTILINEMACRO_END
  55. #endif
  56. #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
  57. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
  58. //!@endcond
  59. /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
  60. \ingroup RAPIDJSON_ERRORS
  61. \brief Macro to indicate a parse error.
  62. \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
  63. \param offset position of the error in JSON input (\c size_t)
  64. This macros can be used as a customization point for the internal
  65. error handling mechanism of RapidJSON.
  66. A common usage model is to throw an exception instead of requiring the
  67. caller to explicitly check the \ref rapidjson::GenericReader::Parse's
  68. return value:
  69. \code
  70. #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
  71. throw ParseException(parseErrorCode, #parseErrorCode, offset)
  72. #include <stdexcept> // std::runtime_error
  73. #include "rapidjson/error/error.h" // rapidjson::ParseResult
  74. struct ParseException : std::runtime_error, rapidjson::ParseResult {
  75. ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
  76. : std::runtime_error(msg), ParseResult(code, offset) {}
  77. };
  78. #include "rapidjson/reader.h"
  79. \endcode
  80. \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
  81. */
  82. #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
  83. #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
  84. RAPIDJSON_MULTILINEMACRO_BEGIN \
  85. RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
  86. SetParseError(parseErrorCode, offset); \
  87. RAPIDJSON_MULTILINEMACRO_END
  88. #endif
  89. /*! \def RAPIDJSON_PARSE_ERROR
  90. \ingroup RAPIDJSON_ERRORS
  91. \brief (Internal) macro to indicate and handle a parse error.
  92. \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
  93. \param offset position of the error in JSON input (\c size_t)
  94. Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
  95. \see RAPIDJSON_PARSE_ERROR_NORETURN
  96. \hideinitializer
  97. */
  98. #ifndef RAPIDJSON_PARSE_ERROR
  99. #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
  100. RAPIDJSON_MULTILINEMACRO_BEGIN \
  101. RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
  102. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
  103. RAPIDJSON_MULTILINEMACRO_END
  104. #endif
  105. #include "error/error.h" // ParseErrorCode, ParseResult
  106. RAPIDJSON_NAMESPACE_BEGIN
  107. ///////////////////////////////////////////////////////////////////////////////
  108. // ParseFlag
  109. /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
  110. \ingroup RAPIDJSON_CONFIG
  111. \brief User-defined kParseDefaultFlags definition.
  112. User can define this as any \c ParseFlag combinations.
  113. */
  114. #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
  115. #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
  116. #endif
  117. //! Combination of parseFlags
  118. /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
  119. */
  120. enum ParseFlag {
  121. kParseNoFlags = 0, //!< No flags are set.
  122. kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
  123. kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
  124. kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
  125. kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
  126. kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
  127. kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments.
  128. kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings.
  129. kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
  130. kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
  131. kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
  132. };
  133. ///////////////////////////////////////////////////////////////////////////////
  134. // Handler
  135. /*! \class rapidjson::Handler
  136. \brief Concept for receiving events from GenericReader upon parsing.
  137. The functions return true if no error occurs. If they return false,
  138. the event publisher should terminate the process.
  139. \code
  140. concept Handler {
  141. typename Ch;
  142. bool Null();
  143. bool Bool(bool b);
  144. bool Int(int i);
  145. bool Uint(unsigned i);
  146. bool Int64(int64_t i);
  147. bool Uint64(uint64_t i);
  148. bool Double(double d);
  149. /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
  150. bool RawNumber(const Ch* str, SizeType length, bool copy);
  151. bool String(const Ch* str, SizeType length, bool copy);
  152. bool StartObject();
  153. bool Key(const Ch* str, SizeType length, bool copy);
  154. bool EndObject(SizeType memberCount);
  155. bool StartArray();
  156. bool EndArray(SizeType elementCount);
  157. };
  158. \endcode
  159. */
  160. ///////////////////////////////////////////////////////////////////////////////
  161. // BaseReaderHandler
  162. //! Default implementation of Handler.
  163. /*! This can be used as base class of any reader handler.
  164. \note implements Handler concept
  165. */
  166. template<typename Encoding = UTF8<>, typename Derived = void>
  167. struct BaseReaderHandler {
  168. typedef typename Encoding::Ch Ch;
  169. typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
  170. bool Default() { return true; }
  171. bool Null() { return static_cast<Override&>(*this).Default(); }
  172. bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
  173. bool Int(int) { return static_cast<Override&>(*this).Default(); }
  174. bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
  175. bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
  176. bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
  177. bool Double(double) { return static_cast<Override&>(*this).Default(); }
  178. /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
  179. bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
  180. bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
  181. bool StartObject() { return static_cast<Override&>(*this).Default(); }
  182. bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
  183. bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
  184. bool StartArray() { return static_cast<Override&>(*this).Default(); }
  185. bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
  186. };
  187. ///////////////////////////////////////////////////////////////////////////////
  188. // StreamLocalCopy
  189. namespace internal {
  190. template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
  191. class StreamLocalCopy;
  192. //! Do copy optimization.
  193. template<typename Stream>
  194. class StreamLocalCopy<Stream, 1> {
  195. public:
  196. StreamLocalCopy(Stream& original) : s(original), original_(original) {}
  197. ~StreamLocalCopy() { original_ = s; }
  198. Stream s;
  199. private:
  200. StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
  201. Stream& original_;
  202. };
  203. //! Keep reference.
  204. template<typename Stream>
  205. class StreamLocalCopy<Stream, 0> {
  206. public:
  207. StreamLocalCopy(Stream& original) : s(original) {}
  208. Stream& s;
  209. private:
  210. StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
  211. };
  212. } // namespace internal
  213. ///////////////////////////////////////////////////////////////////////////////
  214. // SkipWhitespace
  215. //! Skip the JSON white spaces in a stream.
  216. /*! \param is A input stream for skipping white spaces.
  217. \note This function has SSE2/SSE4.2 specialization.
  218. */
  219. template<typename InputStream>
  220. void SkipWhitespace(InputStream& is) {
  221. internal::StreamLocalCopy<InputStream> copy(is);
  222. InputStream& s(copy.s);
  223. typename InputStream::Ch c;
  224. while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
  225. s.Take();
  226. }
  227. inline const char* SkipWhitespace(const char* p, const char* end) {
  228. while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  229. ++p;
  230. return p;
  231. }
  232. #ifdef RAPIDJSON_SSE42
  233. //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
  234. inline const char *SkipWhitespace_SIMD(const char* p) {
  235. // Fast return for single non-whitespace
  236. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  237. ++p;
  238. else
  239. return p;
  240. // 16-byte align to the next boundary
  241. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  242. while (p != nextAligned)
  243. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  244. ++p;
  245. else
  246. return p;
  247. // The rest of string using SIMD
  248. static const char whitespace[16] = " \n\r\t";
  249. const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
  250. for (;; p += 16) {
  251. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  252. const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
  253. if (r != 0) { // some of characters is non-whitespace
  254. #ifdef _MSC_VER // Find the index of first non-whitespace
  255. unsigned long offset;
  256. _BitScanForward(&offset, r);
  257. return p + offset;
  258. #else
  259. return p + __builtin_ffs(r) - 1;
  260. #endif
  261. }
  262. }
  263. }
  264. inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
  265. // Fast return for single non-whitespace
  266. if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  267. ++p;
  268. else
  269. return p;
  270. // The middle of string using SIMD
  271. static const char whitespace[16] = " \n\r\t";
  272. const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
  273. for (; p <= end - 16; p += 16) {
  274. const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
  275. const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
  276. if (r != 0) { // some of characters is non-whitespace
  277. #ifdef _MSC_VER // Find the index of first non-whitespace
  278. unsigned long offset;
  279. _BitScanForward(&offset, r);
  280. return p + offset;
  281. #else
  282. return p + __builtin_ffs(r) - 1;
  283. #endif
  284. }
  285. }
  286. return SkipWhitespace(p, end);
  287. }
  288. #elif defined(RAPIDJSON_SSE2)
  289. //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
  290. inline const char *SkipWhitespace_SIMD(const char* p) {
  291. // Fast return for single non-whitespace
  292. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  293. ++p;
  294. else
  295. return p;
  296. // 16-byte align to the next boundary
  297. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  298. while (p != nextAligned)
  299. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  300. ++p;
  301. else
  302. return p;
  303. // The rest of string
  304. #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
  305. static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
  306. #undef C16
  307. const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
  308. const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
  309. const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
  310. const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
  311. for (;; p += 16) {
  312. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  313. __m128i x = _mm_cmpeq_epi8(s, w0);
  314. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
  315. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
  316. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
  317. unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
  318. if (r != 0) { // some of characters may be non-whitespace
  319. #ifdef _MSC_VER // Find the index of first non-whitespace
  320. unsigned long offset;
  321. _BitScanForward(&offset, r);
  322. return p + offset;
  323. #else
  324. return p + __builtin_ffs(r) - 1;
  325. #endif
  326. }
  327. }
  328. }
  329. inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
  330. // Fast return for single non-whitespace
  331. if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  332. ++p;
  333. else
  334. return p;
  335. // The rest of string
  336. #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
  337. static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
  338. #undef C16
  339. const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
  340. const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
  341. const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
  342. const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
  343. for (; p <= end - 16; p += 16) {
  344. const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
  345. __m128i x = _mm_cmpeq_epi8(s, w0);
  346. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
  347. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
  348. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
  349. unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
  350. if (r != 0) { // some of characters may be non-whitespace
  351. #ifdef _MSC_VER // Find the index of first non-whitespace
  352. unsigned long offset;
  353. _BitScanForward(&offset, r);
  354. return p + offset;
  355. #else
  356. return p + __builtin_ffs(r) - 1;
  357. #endif
  358. }
  359. }
  360. return SkipWhitespace(p, end);
  361. }
  362. #endif // RAPIDJSON_SSE2
  363. #ifdef RAPIDJSON_SIMD
  364. //! Template function specialization for InsituStringStream
  365. template<> inline void SkipWhitespace(InsituStringStream& is) {
  366. is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
  367. }
  368. //! Template function specialization for StringStream
  369. template<> inline void SkipWhitespace(StringStream& is) {
  370. is.src_ = SkipWhitespace_SIMD(is.src_);
  371. }
  372. template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
  373. is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
  374. }
  375. #endif // RAPIDJSON_SIMD
  376. ///////////////////////////////////////////////////////////////////////////////
  377. // GenericReader
  378. //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
  379. /*! GenericReader parses JSON text from a stream, and send events synchronously to an
  380. object implementing Handler concept.
  381. It needs to allocate a stack for storing a single decoded string during
  382. non-destructive parsing.
  383. For in-situ parsing, the decoded string is directly written to the source
  384. text string, no temporary buffer is required.
  385. A GenericReader object can be reused for parsing multiple JSON text.
  386. \tparam SourceEncoding Encoding of the input stream.
  387. \tparam TargetEncoding Encoding of the parse output.
  388. \tparam StackAllocator Allocator type for stack.
  389. */
  390. template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
  391. class GenericReader {
  392. public:
  393. typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
  394. //! Constructor.
  395. /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
  396. \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
  397. */
  398. GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {}
  399. //! Parse JSON text.
  400. /*! \tparam parseFlags Combination of \ref ParseFlag.
  401. \tparam InputStream Type of input stream, implementing Stream concept.
  402. \tparam Handler Type of handler, implementing Handler concept.
  403. \param is Input stream to be parsed.
  404. \param handler The handler to receive events.
  405. \return Whether the parsing is successful.
  406. */
  407. template <unsigned parseFlags, typename InputStream, typename Handler>
  408. ParseResult Parse(InputStream& is, Handler& handler) {
  409. if (parseFlags & kParseIterativeFlag)
  410. return IterativeParse<parseFlags>(is, handler);
  411. parseResult_.Clear();
  412. ClearStackOnExit scope(*this);
  413. SkipWhitespaceAndComments<parseFlags>(is);
  414. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  415. if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
  416. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
  417. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  418. }
  419. else {
  420. ParseValue<parseFlags>(is, handler);
  421. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  422. if (!(parseFlags & kParseStopWhenDoneFlag)) {
  423. SkipWhitespaceAndComments<parseFlags>(is);
  424. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  425. if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
  426. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
  427. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  428. }
  429. }
  430. }
  431. return parseResult_;
  432. }
  433. //! Parse JSON text (with \ref kParseDefaultFlags)
  434. /*! \tparam InputStream Type of input stream, implementing Stream concept
  435. \tparam Handler Type of handler, implementing Handler concept.
  436. \param is Input stream to be parsed.
  437. \param handler The handler to receive events.
  438. \return Whether the parsing is successful.
  439. */
  440. template <typename InputStream, typename Handler>
  441. ParseResult Parse(InputStream& is, Handler& handler) {
  442. return Parse<kParseDefaultFlags>(is, handler);
  443. }
  444. //! Whether a parse error has occured in the last parsing.
  445. bool HasParseError() const { return parseResult_.IsError(); }
  446. //! Get the \ref ParseErrorCode of last parsing.
  447. ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
  448. //! Get the position of last parsing error in input, 0 otherwise.
  449. size_t GetErrorOffset() const { return parseResult_.Offset(); }
  450. protected:
  451. void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
  452. private:
  453. // Prohibit copy constructor & assignment operator.
  454. GenericReader(const GenericReader&);
  455. GenericReader& operator=(const GenericReader&);
  456. void ClearStack() { stack_.Clear(); }
  457. // clear stack on any exit from ParseStream, e.g. due to exception
  458. struct ClearStackOnExit {
  459. explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
  460. ~ClearStackOnExit() { r_.ClearStack(); }
  461. private:
  462. GenericReader& r_;
  463. ClearStackOnExit(const ClearStackOnExit&);
  464. ClearStackOnExit& operator=(const ClearStackOnExit&);
  465. };
  466. template<unsigned parseFlags, typename InputStream>
  467. void SkipWhitespaceAndComments(InputStream& is) {
  468. SkipWhitespace(is);
  469. if (parseFlags & kParseCommentsFlag) {
  470. while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
  471. if (Consume(is, '*')) {
  472. while (true) {
  473. if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
  474. RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
  475. else if (Consume(is, '*')) {
  476. if (Consume(is, '/'))
  477. break;
  478. }
  479. else
  480. is.Take();
  481. }
  482. }
  483. else if (RAPIDJSON_LIKELY(Consume(is, '/')))
  484. while (is.Peek() != '\0' && is.Take() != '\n');
  485. else
  486. RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
  487. SkipWhitespace(is);
  488. }
  489. }
  490. }
  491. // Parse object: { string : value, ... }
  492. template<unsigned parseFlags, typename InputStream, typename Handler>
  493. void ParseObject(InputStream& is, Handler& handler) {
  494. RAPIDJSON_ASSERT(is.Peek() == '{');
  495. is.Take(); // Skip '{'
  496. if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
  497. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  498. SkipWhitespaceAndComments<parseFlags>(is);
  499. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  500. if (Consume(is, '}')) {
  501. if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
  502. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  503. return;
  504. }
  505. for (SizeType memberCount = 0;;) {
  506. if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
  507. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
  508. ParseString<parseFlags>(is, handler, true);
  509. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  510. SkipWhitespaceAndComments<parseFlags>(is);
  511. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  512. if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
  513. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
  514. SkipWhitespaceAndComments<parseFlags>(is);
  515. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  516. ParseValue<parseFlags>(is, handler);
  517. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  518. SkipWhitespaceAndComments<parseFlags>(is);
  519. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  520. ++memberCount;
  521. switch (is.Peek()) {
  522. case ',':
  523. is.Take();
  524. SkipWhitespaceAndComments<parseFlags>(is);
  525. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  526. break;
  527. case '}':
  528. is.Take();
  529. if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
  530. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  531. return;
  532. default:
  533. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
  534. }
  535. if (parseFlags & kParseTrailingCommasFlag) {
  536. if (is.Peek() == '}') {
  537. if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
  538. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  539. is.Take();
  540. return;
  541. }
  542. }
  543. }
  544. }
  545. // Parse array: [ value, ... ]
  546. template<unsigned parseFlags, typename InputStream, typename Handler>
  547. void ParseArray(InputStream& is, Handler& handler) {
  548. RAPIDJSON_ASSERT(is.Peek() == '[');
  549. is.Take(); // Skip '['
  550. if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
  551. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  552. SkipWhitespaceAndComments<parseFlags>(is);
  553. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  554. if (Consume(is, ']')) {
  555. if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
  556. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  557. return;
  558. }
  559. for (SizeType elementCount = 0;;) {
  560. ParseValue<parseFlags>(is, handler);
  561. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  562. ++elementCount;
  563. SkipWhitespaceAndComments<parseFlags>(is);
  564. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  565. if (Consume(is, ',')) {
  566. SkipWhitespaceAndComments<parseFlags>(is);
  567. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  568. }
  569. else if (Consume(is, ']')) {
  570. if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
  571. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  572. return;
  573. }
  574. else
  575. RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
  576. if (parseFlags & kParseTrailingCommasFlag) {
  577. if (is.Peek() == ']') {
  578. if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
  579. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  580. is.Take();
  581. return;
  582. }
  583. }
  584. }
  585. }
  586. template<unsigned parseFlags, typename InputStream, typename Handler>
  587. void ParseNull(InputStream& is, Handler& handler) {
  588. RAPIDJSON_ASSERT(is.Peek() == 'n');
  589. is.Take();
  590. if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
  591. if (RAPIDJSON_UNLIKELY(!handler.Null()))
  592. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  593. }
  594. else
  595. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  596. }
  597. template<unsigned parseFlags, typename InputStream, typename Handler>
  598. void ParseTrue(InputStream& is, Handler& handler) {
  599. RAPIDJSON_ASSERT(is.Peek() == 't');
  600. is.Take();
  601. if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
  602. if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
  603. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  604. }
  605. else
  606. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  607. }
  608. template<unsigned parseFlags, typename InputStream, typename Handler>
  609. void ParseFalse(InputStream& is, Handler& handler) {
  610. RAPIDJSON_ASSERT(is.Peek() == 'f');
  611. is.Take();
  612. if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
  613. if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
  614. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  615. }
  616. else
  617. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  618. }
  619. template<typename InputStream>
  620. RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
  621. if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
  622. is.Take();
  623. return true;
  624. }
  625. else
  626. return false;
  627. }
  628. // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
  629. template<typename InputStream>
  630. unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
  631. unsigned codepoint = 0;
  632. for (int i = 0; i < 4; i++) {
  633. Ch c = is.Peek();
  634. codepoint <<= 4;
  635. codepoint += static_cast<unsigned>(c);
  636. if (c >= '0' && c <= '9')
  637. codepoint -= '0';
  638. else if (c >= 'A' && c <= 'F')
  639. codepoint -= 'A' - 10;
  640. else if (c >= 'a' && c <= 'f')
  641. codepoint -= 'a' - 10;
  642. else {
  643. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
  644. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
  645. }
  646. is.Take();
  647. }
  648. return codepoint;
  649. }
  650. template <typename CharType>
  651. class StackStream {
  652. public:
  653. typedef CharType Ch;
  654. StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
  655. RAPIDJSON_FORCEINLINE void Put(Ch c) {
  656. *stack_.template Push<Ch>() = c;
  657. ++length_;
  658. }
  659. RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
  660. length_ += count;
  661. return stack_.template Push<Ch>(count);
  662. }
  663. size_t Length() const { return length_; }
  664. Ch* Pop() {
  665. return stack_.template Pop<Ch>(length_);
  666. }
  667. private:
  668. StackStream(const StackStream&);
  669. StackStream& operator=(const StackStream&);
  670. internal::Stack<StackAllocator>& stack_;
  671. SizeType length_;
  672. };
  673. // Parse string and generate String event. Different code paths for kParseInsituFlag.
  674. template<unsigned parseFlags, typename InputStream, typename Handler>
  675. void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
  676. internal::StreamLocalCopy<InputStream> copy(is);
  677. InputStream& s(copy.s);
  678. RAPIDJSON_ASSERT(s.Peek() == '\"');
  679. s.Take(); // Skip '\"'
  680. bool success = false;
  681. if (parseFlags & kParseInsituFlag) {
  682. typename InputStream::Ch *head = s.PutBegin();
  683. ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
  684. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  685. size_t length = s.PutEnd(head) - 1;
  686. RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
  687. const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
  688. success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
  689. }
  690. else {
  691. StackStream<typename TargetEncoding::Ch> stackStream(stack_);
  692. ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
  693. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  694. SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
  695. const typename TargetEncoding::Ch* const str = stackStream.Pop();
  696. success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
  697. }
  698. if (RAPIDJSON_UNLIKELY(!success))
  699. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
  700. }
  701. // Parse string to an output is
  702. // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
  703. template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
  704. RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
  705. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  706. #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  707. static const char escape[256] = {
  708. Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
  709. Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
  710. 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
  711. 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  712. Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
  713. };
  714. #undef Z16
  715. //!@endcond
  716. for (;;) {
  717. // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
  718. if (!(parseFlags & kParseValidateEncodingFlag))
  719. ScanCopyUnescapedString(is, os);
  720. Ch c = is.Peek();
  721. if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
  722. size_t escapeOffset = is.Tell(); // For invalid escaping, report the inital '\\' as error offset
  723. is.Take();
  724. Ch e = is.Peek();
  725. if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
  726. is.Take();
  727. os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
  728. }
  729. else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
  730. is.Take();
  731. unsigned codepoint = ParseHex4(is, escapeOffset);
  732. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  733. if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
  734. // Handle UTF-16 surrogate pair
  735. if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
  736. RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
  737. unsigned codepoint2 = ParseHex4(is, escapeOffset);
  738. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  739. if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
  740. RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
  741. codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
  742. }
  743. TEncoding::Encode(os, codepoint);
  744. }
  745. else
  746. RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
  747. }
  748. else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
  749. is.Take();
  750. os.Put('\0'); // null-terminate the string
  751. return;
  752. }
  753. else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
  754. if (c == '\0')
  755. RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
  756. else
  757. RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell());
  758. }
  759. else {
  760. size_t offset = is.Tell();
  761. if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
  762. !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
  763. !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
  764. RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
  765. }
  766. }
  767. }
  768. template<typename InputStream, typename OutputStream>
  769. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
  770. // Do nothing for generic version
  771. }
  772. #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
  773. // StringStream -> StackStream<char>
  774. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
  775. const char* p = is.src_;
  776. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  777. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  778. while (p != nextAligned)
  779. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  780. is.src_ = p;
  781. return;
  782. }
  783. else
  784. os.Put(*p++);
  785. // The rest of string using SIMD
  786. static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
  787. static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
  788. static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
  789. const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  790. const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  791. const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  792. for (;; p += 16) {
  793. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  794. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  795. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  796. const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
  797. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  798. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  799. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  800. SizeType length;
  801. #ifdef _MSC_VER // Find the index of first escaped
  802. unsigned long offset;
  803. _BitScanForward(&offset, r);
  804. length = offset;
  805. #else
  806. length = static_cast<SizeType>(__builtin_ffs(r) - 1);
  807. #endif
  808. char* q = reinterpret_cast<char*>(os.Push(length));
  809. for (size_t i = 0; i < length; i++)
  810. q[i] = p[i];
  811. p += length;
  812. break;
  813. }
  814. _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
  815. }
  816. is.src_ = p;
  817. }
  818. // InsituStringStream -> InsituStringStream
  819. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
  820. RAPIDJSON_ASSERT(&is == &os);
  821. (void)os;
  822. if (is.src_ == is.dst_) {
  823. SkipUnescapedString(is);
  824. return;
  825. }
  826. char* p = is.src_;
  827. char *q = is.dst_;
  828. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  829. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  830. while (p != nextAligned)
  831. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  832. is.src_ = p;
  833. is.dst_ = q;
  834. return;
  835. }
  836. else
  837. *q++ = *p++;
  838. // The rest of string using SIMD
  839. static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
  840. static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
  841. static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
  842. const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  843. const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  844. const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  845. for (;; p += 16, q += 16) {
  846. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  847. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  848. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  849. const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
  850. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  851. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  852. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  853. size_t length;
  854. #ifdef _MSC_VER // Find the index of first escaped
  855. unsigned long offset;
  856. _BitScanForward(&offset, r);
  857. length = offset;
  858. #else
  859. length = static_cast<size_t>(__builtin_ffs(r) - 1);
  860. #endif
  861. for (const char* pend = p + length; p != pend; )
  862. *q++ = *p++;
  863. break;
  864. }
  865. _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
  866. }
  867. is.src_ = p;
  868. is.dst_ = q;
  869. }
  870. // When read/write pointers are the same for insitu stream, just skip unescaped characters
  871. static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
  872. RAPIDJSON_ASSERT(is.src_ == is.dst_);
  873. char* p = is.src_;
  874. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  875. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  876. for (; p != nextAligned; p++)
  877. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  878. is.src_ = is.dst_ = p;
  879. return;
  880. }
  881. // The rest of string using SIMD
  882. static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
  883. static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
  884. static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
  885. const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  886. const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  887. const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  888. for (;; p += 16) {
  889. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  890. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  891. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  892. const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
  893. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  894. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  895. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  896. size_t length;
  897. #ifdef _MSC_VER // Find the index of first escaped
  898. unsigned long offset;
  899. _BitScanForward(&offset, r);
  900. length = offset;
  901. #else
  902. length = static_cast<size_t>(__builtin_ffs(r) - 1);
  903. #endif
  904. p += length;
  905. break;
  906. }
  907. }
  908. is.src_ = is.dst_ = p;
  909. }
  910. #endif
  911. template<typename InputStream, bool backup, bool pushOnTake>
  912. class NumberStream;
  913. template<typename InputStream>
  914. class NumberStream<InputStream, false, false> {
  915. public:
  916. typedef typename InputStream::Ch Ch;
  917. NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
  918. ~NumberStream() {}
  919. RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
  920. RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
  921. RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
  922. RAPIDJSON_FORCEINLINE void Push(char) {}
  923. size_t Tell() { return is.Tell(); }
  924. size_t Length() { return 0; }
  925. const char* Pop() { return 0; }
  926. protected:
  927. NumberStream& operator=(const NumberStream&);
  928. InputStream& is;
  929. };
  930. template<typename InputStream>
  931. class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
  932. typedef NumberStream<InputStream, false, false> Base;
  933. public:
  934. NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
  935. ~NumberStream() {}
  936. RAPIDJSON_FORCEINLINE Ch TakePush() {
  937. stackStream.Put(static_cast<char>(Base::is.Peek()));
  938. return Base::is.Take();
  939. }
  940. RAPIDJSON_FORCEINLINE void Push(char c) {
  941. stackStream.Put(c);
  942. }
  943. size_t Length() { return stackStream.Length(); }
  944. const char* Pop() {
  945. stackStream.Put('\0');
  946. return stackStream.Pop();
  947. }
  948. private:
  949. StackStream<char> stackStream;
  950. };
  951. template<typename InputStream>
  952. class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
  953. typedef NumberStream<InputStream, true, false> Base;
  954. public:
  955. NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
  956. ~NumberStream() {}
  957. RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
  958. };
  959. template<unsigned parseFlags, typename InputStream, typename Handler>
  960. void ParseNumber(InputStream& is, Handler& handler) {
  961. internal::StreamLocalCopy<InputStream> copy(is);
  962. NumberStream<InputStream,
  963. ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
  964. ((parseFlags & kParseInsituFlag) == 0) :
  965. ((parseFlags & kParseFullPrecisionFlag) != 0),
  966. (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
  967. (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
  968. size_t startOffset = s.Tell();
  969. double d = 0.0;
  970. bool useNanOrInf = false;
  971. // Parse minus
  972. bool minus = Consume(s, '-');
  973. // Parse int: zero / ( digit1-9 *DIGIT )
  974. unsigned i = 0;
  975. uint64_t i64 = 0;
  976. bool use64bit = false;
  977. int significandDigit = 0;
  978. if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
  979. i = 0;
  980. s.TakePush();
  981. }
  982. else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
  983. i = static_cast<unsigned>(s.TakePush() - '0');
  984. if (minus)
  985. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  986. if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
  987. if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
  988. i64 = i;
  989. use64bit = true;
  990. break;
  991. }
  992. }
  993. i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
  994. significandDigit++;
  995. }
  996. else
  997. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  998. if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
  999. if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
  1000. i64 = i;
  1001. use64bit = true;
  1002. break;
  1003. }
  1004. }
  1005. i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1006. significandDigit++;
  1007. }
  1008. }
  1009. // Parse NaN or Infinity here
  1010. else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
  1011. useNanOrInf = true;
  1012. if (RAPIDJSON_LIKELY(Consume(s, 'N') && Consume(s, 'a') && Consume(s, 'N'))) {
  1013. d = std::numeric_limits<double>::quiet_NaN();
  1014. }
  1015. else if (RAPIDJSON_LIKELY(Consume(s, 'I') && Consume(s, 'n') && Consume(s, 'f'))) {
  1016. d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
  1017. if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
  1018. && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y'))))
  1019. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1020. }
  1021. else
  1022. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1023. }
  1024. else
  1025. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1026. // Parse 64bit int
  1027. bool useDouble = false;
  1028. if (use64bit) {
  1029. if (minus)
  1030. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1031. if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
  1032. if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
  1033. d = static_cast<double>(i64);
  1034. useDouble = true;
  1035. break;
  1036. }
  1037. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1038. significandDigit++;
  1039. }
  1040. else
  1041. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1042. if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
  1043. if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
  1044. d = static_cast<double>(i64);
  1045. useDouble = true;
  1046. break;
  1047. }
  1048. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1049. significandDigit++;
  1050. }
  1051. }
  1052. // Force double for big integer
  1053. if (useDouble) {
  1054. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1055. if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0
  1056. RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
  1057. d = d * 10 + (s.TakePush() - '0');
  1058. }
  1059. }
  1060. // Parse frac = decimal-point 1*DIGIT
  1061. int expFrac = 0;
  1062. size_t decimalPosition;
  1063. if (Consume(s, '.')) {
  1064. decimalPosition = s.Length();
  1065. if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
  1066. RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
  1067. if (!useDouble) {
  1068. #if RAPIDJSON_64BIT
  1069. // Use i64 to store significand in 64-bit architecture
  1070. if (!use64bit)
  1071. i64 = i;
  1072. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1073. if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
  1074. break;
  1075. else {
  1076. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1077. --expFrac;
  1078. if (i64 != 0)
  1079. significandDigit++;
  1080. }
  1081. }
  1082. d = static_cast<double>(i64);
  1083. #else
  1084. // Use double to store significand in 32-bit architecture
  1085. d = static_cast<double>(use64bit ? i64 : i);
  1086. #endif
  1087. useDouble = true;
  1088. }
  1089. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1090. if (significandDigit < 17) {
  1091. d = d * 10.0 + (s.TakePush() - '0');
  1092. --expFrac;
  1093. if (RAPIDJSON_LIKELY(d > 0.0))
  1094. significandDigit++;
  1095. }
  1096. else
  1097. s.TakePush();
  1098. }
  1099. }
  1100. else
  1101. decimalPosition = s.Length(); // decimal position at the end of integer.
  1102. // Parse exp = e [ minus / plus ] 1*DIGIT
  1103. int exp = 0;
  1104. if (Consume(s, 'e') || Consume(s, 'E')) {
  1105. if (!useDouble) {
  1106. d = static_cast<double>(use64bit ? i64 : i);
  1107. useDouble = true;
  1108. }
  1109. bool expMinus = false;
  1110. if (Consume(s, '+'))
  1111. ;
  1112. else if (Consume(s, '-'))
  1113. expMinus = true;
  1114. if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1115. exp = static_cast<int>(s.Take() - '0');
  1116. if (expMinus) {
  1117. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1118. exp = exp * 10 + static_cast<int>(s.Take() - '0');
  1119. if (exp >= 214748364) { // Issue #313: prevent overflow exponent
  1120. while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
  1121. s.Take();
  1122. }
  1123. }
  1124. }
  1125. else { // positive exp
  1126. int maxExp = 308 - expFrac;
  1127. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1128. exp = exp * 10 + static_cast<int>(s.Take() - '0');
  1129. if (RAPIDJSON_UNLIKELY(exp > maxExp))
  1130. RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
  1131. }
  1132. }
  1133. }
  1134. else
  1135. RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
  1136. if (expMinus)
  1137. exp = -exp;
  1138. }
  1139. // Finish parsing, call event according to the type of number.
  1140. bool cont = true;
  1141. if (parseFlags & kParseNumbersAsStringsFlag) {
  1142. if (parseFlags & kParseInsituFlag) {
  1143. s.Pop(); // Pop stack no matter if it will be used or not.
  1144. typename InputStream::Ch* head = is.PutBegin();
  1145. const size_t length = s.Tell() - startOffset;
  1146. RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
  1147. // unable to insert the \0 character here, it will erase the comma after this number
  1148. const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
  1149. cont = handler.RawNumber(str, SizeType(length), false);
  1150. }
  1151. else {
  1152. SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
  1153. StringStream srcStream(s.Pop());
  1154. StackStream<typename TargetEncoding::Ch> dstStream(stack_);
  1155. while (numCharsToCopy--) {
  1156. Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
  1157. }
  1158. dstStream.Put('\0');
  1159. const typename TargetEncoding::Ch* str = dstStream.Pop();
  1160. const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
  1161. cont = handler.RawNumber(str, SizeType(length), true);
  1162. }
  1163. }
  1164. else {
  1165. size_t length = s.Length();
  1166. const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
  1167. if (useDouble) {
  1168. int p = exp + expFrac;
  1169. if (parseFlags & kParseFullPrecisionFlag)
  1170. d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
  1171. else
  1172. d = internal::StrtodNormalPrecision(d, p);
  1173. cont = handler.Double(minus ? -d : d);
  1174. }
  1175. else if (useNanOrInf) {
  1176. cont = handler.Double(d);
  1177. }
  1178. else {
  1179. if (use64bit) {
  1180. if (minus)
  1181. cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
  1182. else
  1183. cont = handler.Uint64(i64);
  1184. }
  1185. else {
  1186. if (minus)
  1187. cont = handler.Int(static_cast<int32_t>(~i + 1));
  1188. else
  1189. cont = handler.Uint(i);
  1190. }
  1191. }
  1192. }
  1193. if (RAPIDJSON_UNLIKELY(!cont))
  1194. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
  1195. }
  1196. // Parse any JSON value
  1197. template<unsigned parseFlags, typename InputStream, typename Handler>
  1198. void ParseValue(InputStream& is, Handler& handler) {
  1199. switch (is.Peek()) {
  1200. case 'n': ParseNull <parseFlags>(is, handler); break;
  1201. case 't': ParseTrue <parseFlags>(is, handler); break;
  1202. case 'f': ParseFalse <parseFlags>(is, handler); break;
  1203. case '"': ParseString<parseFlags>(is, handler); break;
  1204. case '{': ParseObject<parseFlags>(is, handler); break;
  1205. case '[': ParseArray <parseFlags>(is, handler); break;
  1206. default :
  1207. ParseNumber<parseFlags>(is, handler);
  1208. break;
  1209. }
  1210. }
  1211. // Iterative Parsing
  1212. // States
  1213. enum IterativeParsingState {
  1214. IterativeParsingStartState = 0,
  1215. IterativeParsingFinishState,
  1216. IterativeParsingErrorState,
  1217. // Object states
  1218. IterativeParsingObjectInitialState,
  1219. IterativeParsingMemberKeyState,
  1220. IterativeParsingKeyValueDelimiterState,
  1221. IterativeParsingMemberValueState,
  1222. IterativeParsingMemberDelimiterState,
  1223. IterativeParsingObjectFinishState,
  1224. // Array states
  1225. IterativeParsingArrayInitialState,
  1226. IterativeParsingElementState,
  1227. IterativeParsingElementDelimiterState,
  1228. IterativeParsingArrayFinishState,
  1229. // Single value state
  1230. IterativeParsingValueState
  1231. };
  1232. enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 };
  1233. // Tokens
  1234. enum Token {
  1235. LeftBracketToken = 0,
  1236. RightBracketToken,
  1237. LeftCurlyBracketToken,
  1238. RightCurlyBracketToken,
  1239. CommaToken,
  1240. ColonToken,
  1241. StringToken,
  1242. FalseToken,
  1243. TrueToken,
  1244. NullToken,
  1245. NumberToken,
  1246. kTokenCount
  1247. };
  1248. RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
  1249. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  1250. #define N NumberToken
  1251. #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
  1252. // Maps from ASCII to Token
  1253. static const unsigned char tokenMap[256] = {
  1254. N16, // 00~0F
  1255. N16, // 10~1F
  1256. N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
  1257. N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
  1258. N16, // 40~4F
  1259. N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
  1260. N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
  1261. N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
  1262. N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
  1263. };
  1264. #undef N
  1265. #undef N16
  1266. //!@endcond
  1267. if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
  1268. return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
  1269. else
  1270. return NumberToken;
  1271. }
  1272. RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
  1273. // current state x one lookahead token -> new state
  1274. static const char G[cIterativeParsingStateCount][kTokenCount] = {
  1275. // Start
  1276. {
  1277. IterativeParsingArrayInitialState, // Left bracket
  1278. IterativeParsingErrorState, // Right bracket
  1279. IterativeParsingObjectInitialState, // Left curly bracket
  1280. IterativeParsingErrorState, // Right curly bracket
  1281. IterativeParsingErrorState, // Comma
  1282. IterativeParsingErrorState, // Colon
  1283. IterativeParsingValueState, // String
  1284. IterativeParsingValueState, // False
  1285. IterativeParsingValueState, // True
  1286. IterativeParsingValueState, // Null
  1287. IterativeParsingValueState // Number
  1288. },
  1289. // Finish(sink state)
  1290. {
  1291. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1292. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1293. IterativeParsingErrorState
  1294. },
  1295. // Error(sink state)
  1296. {
  1297. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1298. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1299. IterativeParsingErrorState
  1300. },
  1301. // ObjectInitial
  1302. {
  1303. IterativeParsingErrorState, // Left bracket
  1304. IterativeParsingErrorState, // Right bracket
  1305. IterativeParsingErrorState, // Left curly bracket
  1306. IterativeParsingObjectFinishState, // Right curly bracket
  1307. IterativeParsingErrorState, // Comma
  1308. IterativeParsingErrorState, // Colon
  1309. IterativeParsingMemberKeyState, // String
  1310. IterativeParsingErrorState, // False
  1311. IterativeParsingErrorState, // True
  1312. IterativeParsingErrorState, // Null
  1313. IterativeParsingErrorState // Number
  1314. },
  1315. // MemberKey
  1316. {
  1317. IterativeParsingErrorState, // Left bracket
  1318. IterativeParsingErrorState, // Right bracket
  1319. IterativeParsingErrorState, // Left curly bracket
  1320. IterativeParsingErrorState, // Right curly bracket
  1321. IterativeParsingErrorState, // Comma
  1322. IterativeParsingKeyValueDelimiterState, // Colon
  1323. IterativeParsingErrorState, // String
  1324. IterativeParsingErrorState, // False
  1325. IterativeParsingErrorState, // True
  1326. IterativeParsingErrorState, // Null
  1327. IterativeParsingErrorState // Number
  1328. },
  1329. // KeyValueDelimiter
  1330. {
  1331. IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
  1332. IterativeParsingErrorState, // Right bracket
  1333. IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
  1334. IterativeParsingErrorState, // Right curly bracket
  1335. IterativeParsingErrorState, // Comma
  1336. IterativeParsingErrorState, // Colon
  1337. IterativeParsingMemberValueState, // String
  1338. IterativeParsingMemberValueState, // False
  1339. IterativeParsingMemberValueState, // True
  1340. IterativeParsingMemberValueState, // Null
  1341. IterativeParsingMemberValueState // Number
  1342. },
  1343. // MemberValue
  1344. {
  1345. IterativeParsingErrorState, // Left bracket
  1346. IterativeParsingErrorState, // Right bracket
  1347. IterativeParsingErrorState, // Left curly bracket
  1348. IterativeParsingObjectFinishState, // Right curly bracket
  1349. IterativeParsingMemberDelimiterState, // Comma
  1350. IterativeParsingErrorState, // Colon
  1351. IterativeParsingErrorState, // String
  1352. IterativeParsingErrorState, // False
  1353. IterativeParsingErrorState, // True
  1354. IterativeParsingErrorState, // Null
  1355. IterativeParsingErrorState // Number
  1356. },
  1357. // MemberDelimiter
  1358. {
  1359. IterativeParsingErrorState, // Left bracket
  1360. IterativeParsingErrorState, // Right bracket
  1361. IterativeParsingErrorState, // Left curly bracket
  1362. IterativeParsingObjectFinishState, // Right curly bracket
  1363. IterativeParsingErrorState, // Comma
  1364. IterativeParsingErrorState, // Colon
  1365. IterativeParsingMemberKeyState, // String
  1366. IterativeParsingErrorState, // False
  1367. IterativeParsingErrorState, // True
  1368. IterativeParsingErrorState, // Null
  1369. IterativeParsingErrorState // Number
  1370. },
  1371. // ObjectFinish(sink state)
  1372. {
  1373. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1374. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1375. IterativeParsingErrorState
  1376. },
  1377. // ArrayInitial
  1378. {
  1379. IterativeParsingArrayInitialState, // Left bracket(push Element state)
  1380. IterativeParsingArrayFinishState, // Right bracket
  1381. IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
  1382. IterativeParsingErrorState, // Right curly bracket
  1383. IterativeParsingErrorState, // Comma
  1384. IterativeParsingErrorState, // Colon
  1385. IterativeParsingElementState, // String
  1386. IterativeParsingElementState, // False
  1387. IterativeParsingElementState, // True
  1388. IterativeParsingElementState, // Null
  1389. IterativeParsingElementState // Number
  1390. },
  1391. // Element
  1392. {
  1393. IterativeParsingErrorState, // Left bracket
  1394. IterativeParsingArrayFinishState, // Right bracket
  1395. IterativeParsingErrorState, // Left curly bracket
  1396. IterativeParsingErrorState, // Right curly bracket
  1397. IterativeParsingElementDelimiterState, // Comma
  1398. IterativeParsingErrorState, // Colon
  1399. IterativeParsingErrorState, // String
  1400. IterativeParsingErrorState, // False
  1401. IterativeParsingErrorState, // True
  1402. IterativeParsingErrorState, // Null
  1403. IterativeParsingErrorState // Number
  1404. },
  1405. // ElementDelimiter
  1406. {
  1407. IterativeParsingArrayInitialState, // Left bracket(push Element state)
  1408. IterativeParsingArrayFinishState, // Right bracket
  1409. IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
  1410. IterativeParsingErrorState, // Right curly bracket
  1411. IterativeParsingErrorState, // Comma
  1412. IterativeParsingErrorState, // Colon
  1413. IterativeParsingElementState, // String
  1414. IterativeParsingElementState, // False
  1415. IterativeParsingElementState, // True
  1416. IterativeParsingElementState, // Null
  1417. IterativeParsingElementState // Number
  1418. },
  1419. // ArrayFinish(sink state)
  1420. {
  1421. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1422. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1423. IterativeParsingErrorState
  1424. },
  1425. // Single Value (sink state)
  1426. {
  1427. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1428. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1429. IterativeParsingErrorState
  1430. }
  1431. }; // End of G
  1432. return static_cast<IterativeParsingState>(G[state][token]);
  1433. }
  1434. // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
  1435. // May return a new state on state pop.
  1436. template <unsigned parseFlags, typename InputStream, typename Handler>
  1437. RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
  1438. (void)token;
  1439. switch (dst) {
  1440. case IterativeParsingErrorState:
  1441. return dst;
  1442. case IterativeParsingObjectInitialState:
  1443. case IterativeParsingArrayInitialState:
  1444. {
  1445. // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
  1446. // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
  1447. IterativeParsingState n = src;
  1448. if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
  1449. n = IterativeParsingElementState;
  1450. else if (src == IterativeParsingKeyValueDelimiterState)
  1451. n = IterativeParsingMemberValueState;
  1452. // Push current state.
  1453. *stack_.template Push<SizeType>(1) = n;
  1454. // Initialize and push the member/element count.
  1455. *stack_.template Push<SizeType>(1) = 0;
  1456. // Call handler
  1457. bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
  1458. // On handler short circuits the parsing.
  1459. if (!hr) {
  1460. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  1461. return IterativeParsingErrorState;
  1462. }
  1463. else {
  1464. is.Take();
  1465. return dst;
  1466. }
  1467. }
  1468. case IterativeParsingMemberKeyState:
  1469. ParseString<parseFlags>(is, handler, true);
  1470. if (HasParseError())
  1471. return IterativeParsingErrorState;
  1472. else
  1473. return dst;
  1474. case IterativeParsingKeyValueDelimiterState:
  1475. RAPIDJSON_ASSERT(token == ColonToken);
  1476. is.Take();
  1477. return dst;
  1478. case IterativeParsingMemberValueState:
  1479. // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
  1480. ParseValue<parseFlags>(is, handler);
  1481. if (HasParseError()) {
  1482. return IterativeParsingErrorState;
  1483. }
  1484. return dst;
  1485. case IterativeParsingElementState:
  1486. // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
  1487. ParseValue<parseFlags>(is, handler);
  1488. if (HasParseError()) {
  1489. return IterativeParsingErrorState;
  1490. }
  1491. return dst;
  1492. case IterativeParsingMemberDelimiterState:
  1493. case IterativeParsingElementDelimiterState:
  1494. is.Take();
  1495. // Update member/element count.
  1496. *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
  1497. return dst;
  1498. case IterativeParsingObjectFinishState:
  1499. {
  1500. // Transit from delimiter is only allowed when trailing commas are enabled
  1501. if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
  1502. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
  1503. return IterativeParsingErrorState;
  1504. }
  1505. // Get member count.
  1506. SizeType c = *stack_.template Pop<SizeType>(1);
  1507. // If the object is not empty, count the last member.
  1508. if (src == IterativeParsingMemberValueState)
  1509. ++c;
  1510. // Restore the state.
  1511. IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
  1512. // Transit to Finish state if this is the topmost scope.
  1513. if (n == IterativeParsingStartState)
  1514. n = IterativeParsingFinishState;
  1515. // Call handler
  1516. bool hr = handler.EndObject(c);
  1517. // On handler short circuits the parsing.
  1518. if (!hr) {
  1519. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  1520. return IterativeParsingErrorState;
  1521. }
  1522. else {
  1523. is.Take();
  1524. return n;
  1525. }
  1526. }
  1527. case IterativeParsingArrayFinishState:
  1528. {
  1529. // Transit from delimiter is only allowed when trailing commas are enabled
  1530. if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
  1531. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
  1532. return IterativeParsingErrorState;
  1533. }
  1534. // Get element count.
  1535. SizeType c = *stack_.template Pop<SizeType>(1);
  1536. // If the array is not empty, count the last element.
  1537. if (src == IterativeParsingElementState)
  1538. ++c;
  1539. // Restore the state.
  1540. IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
  1541. // Transit to Finish state if this is the topmost scope.
  1542. if (n == IterativeParsingStartState)
  1543. n = IterativeParsingFinishState;
  1544. // Call handler
  1545. bool hr = handler.EndArray(c);
  1546. // On handler short circuits the parsing.
  1547. if (!hr) {
  1548. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  1549. return IterativeParsingErrorState;
  1550. }
  1551. else {
  1552. is.Take();
  1553. return n;
  1554. }
  1555. }
  1556. default:
  1557. // This branch is for IterativeParsingValueState actually.
  1558. // Use `default:` rather than
  1559. // `case IterativeParsingValueState:` is for code coverage.
  1560. // The IterativeParsingStartState is not enumerated in this switch-case.
  1561. // It is impossible for that case. And it can be caught by following assertion.
  1562. // The IterativeParsingFinishState is not enumerated in this switch-case either.
  1563. // It is a "derivative" state which cannot triggered from Predict() directly.
  1564. // Therefore it cannot happen here. And it can be caught by following assertion.
  1565. RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
  1566. // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
  1567. ParseValue<parseFlags>(is, handler);
  1568. if (HasParseError()) {
  1569. return IterativeParsingErrorState;
  1570. }
  1571. return IterativeParsingFinishState;
  1572. }
  1573. }
  1574. template <typename InputStream>
  1575. void HandleError(IterativeParsingState src, InputStream& is) {
  1576. if (HasParseError()) {
  1577. // Error flag has been set.
  1578. return;
  1579. }
  1580. switch (src) {
  1581. case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
  1582. case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
  1583. case IterativeParsingObjectInitialState:
  1584. case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
  1585. case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
  1586. case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
  1587. case IterativeParsingKeyValueDelimiterState:
  1588. case IterativeParsingArrayInitialState:
  1589. case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
  1590. default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
  1591. }
  1592. }
  1593. template <unsigned parseFlags, typename InputStream, typename Handler>
  1594. ParseResult IterativeParse(InputStream& is, Handler& handler) {
  1595. parseResult_.Clear();
  1596. ClearStackOnExit scope(*this);
  1597. IterativeParsingState state = IterativeParsingStartState;
  1598. SkipWhitespaceAndComments<parseFlags>(is);
  1599. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  1600. while (is.Peek() != '\0') {
  1601. Token t = Tokenize(is.Peek());
  1602. IterativeParsingState n = Predict(state, t);
  1603. IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
  1604. if (d == IterativeParsingErrorState) {
  1605. HandleError(state, is);
  1606. break;
  1607. }
  1608. state = d;
  1609. // Do not further consume streams if a root JSON has been parsed.
  1610. if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
  1611. break;
  1612. SkipWhitespaceAndComments<parseFlags>(is);
  1613. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  1614. }
  1615. // Handle the end of file.
  1616. if (state != IterativeParsingFinishState)
  1617. HandleError(state, is);
  1618. return parseResult_;
  1619. }
  1620. static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
  1621. internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
  1622. ParseResult parseResult_;
  1623. }; // class GenericReader
  1624. //! Reader with UTF8 encoding and default allocator.
  1625. typedef GenericReader<UTF8<>, UTF8<> > Reader;
  1626. RAPIDJSON_NAMESPACE_END
  1627. #ifdef __clang__
  1628. RAPIDJSON_DIAG_POP
  1629. #endif
  1630. #ifdef __GNUC__
  1631. RAPIDJSON_DIAG_POP
  1632. #endif
  1633. #ifdef _MSC_VER
  1634. RAPIDJSON_DIAG_POP
  1635. #endif
  1636. #endif // RAPIDJSON_READER_H_