From ae44478b30d890fe0fb04022f44d474dcdcc3f9d Mon Sep 17 00:00:00 2001 From: Lassi Pulkkinen Date: Thu, 31 Oct 2024 03:11:21 +0200 Subject: Initial commit (import old repo) --- encoding/json/+test/lexer.ha | 62 ++++++ encoding/json/+test/test_load.ha | 164 +++++++++++++++ encoding/json/+test/test_value.ha | 49 +++++ encoding/json/COPYING | 367 +++++++++++++++++++++++++++++++++ encoding/json/README | 15 ++ encoding/json/dump.ha | 81 ++++++++ encoding/json/lex.ha | 417 ++++++++++++++++++++++++++++++++++++++ encoding/json/load.ha | 148 ++++++++++++++ encoding/json/path/path.ha | 26 +++ encoding/json/types.ha | 50 +++++ encoding/json/value.ha | 217 ++++++++++++++++++++ 11 files changed, 1596 insertions(+) create mode 100644 encoding/json/+test/lexer.ha create mode 100644 encoding/json/+test/test_load.ha create mode 100644 encoding/json/+test/test_value.ha create mode 100644 encoding/json/COPYING create mode 100644 encoding/json/README create mode 100644 encoding/json/dump.ha create mode 100644 encoding/json/lex.ha create mode 100644 encoding/json/load.ha create mode 100644 encoding/json/path/path.ha create mode 100644 encoding/json/types.ha create mode 100644 encoding/json/value.ha (limited to 'encoding') diff --git a/encoding/json/+test/lexer.ha b/encoding/json/+test/lexer.ha new file mode 100644 index 0000000..b4c098e --- /dev/null +++ b/encoding/json/+test/lexer.ha @@ -0,0 +1,62 @@ +use io; +use memio; +use strings; + +@test fn lex() void = { + const cases: [_](str, []token) = [ + ("true", [true]), + ("false", [false]), + ("null", [_null]), + ("1234", [1234.0]), + ("12.34", [12.34]), + ("12.34e5", [12.34e5]), + ("12.34E5", [12.34e5]), + ("12.34e+5", [12.34e5]), + ("12.34e-5", [12.34e-5]), + ("12e5", [12.0e5]), + ("-1234", [-1234.0]), + (`"hello world"`, ["hello world"]), + (`"\"\\\/\b\f\n\r\t\u0020"`, ["\"\\/\b\f\n\r\t\u0020"]), + ("[ null, null ]", [arraystart, _null, comma, _null, arrayend]), + ]; + + for (let i = 0z; i < len(cases); i += 1) { + const src = strings::toutf8(cases[i].0); + const src = memio::fixed(src); + const lexer = newlexer(&src); + defer close(&lexer); + + for (let j = 0z; j < len(cases[i].1); j += 1) { + const want = cases[i].1[j]; + const have = lex(&lexer)! as token; + assert(tokeq(want, have)); + }; + + assert(lex(&lexer) is io::EOF); + }; +}; + +fn tokeq(want: token, have: token) bool = { + match (want) { + case _null => + return have is _null; + case comma => + return have is comma; + case colon => + return have is colon; + case arraystart => + return have is arraystart; + case arrayend => + return have is arrayend; + case objstart => + return have is objstart; + case objend => + return have is objend; + case let b: bool => + return have as bool == b; + case let f: f64 => + return have as f64 == f; + case let s: str => + return have as str == s; + }; +}; diff --git a/encoding/json/+test/test_load.ha b/encoding/json/+test/test_load.ha new file mode 100644 index 0000000..bf53777 --- /dev/null +++ b/encoding/json/+test/test_load.ha @@ -0,0 +1,164 @@ +use fmt; + +fn roundtrip(input: str, expected: value) void = { + const val = loadstr(input)!; + defer finish(val); + assert(equal(val, expected)); + const s = dumpstr(val); + defer free(s); + const val = loadstr(s)!; + defer finish(val); + assert(equal(val, expected)); +}; + +fn errassert(input: str, expected_loc: (uint, uint)) void = { + const loc = loadstr(input) as invalid; + if (loc.0 != expected_loc.0 || loc.1 != expected_loc.1) { + fmt::errorfln("=== JSON:\n{}", input)!; + fmt::errorfln("=== expected error location:\n({}, {})", + expected_loc.0, expected_loc.1)!; + fmt::errorfln("=== actual error location:\n({}, {})", + loc.0, loc.1)!; + abort(); + }; +}; + +@test fn load() void = { + let obj = newobject(); + defer finish(obj); + let obj2 = newobject(); + defer finish(obj2); + + roundtrip(`1234`, 1234.0); + roundtrip(`[]`, []); + roundtrip(`[1, 2, 3, null]`, [1.0, 2.0, 3.0, _null]); + roundtrip(`{}`, obj); + set(&obj, "hello", "world"); + set(&obj, "answer", 42.0); + roundtrip(`{ "hello": "world", "answer": 42 }`, obj); + reset(&obj); + roundtrip(`[[] ]`, [[]]); + roundtrip(`[""]`, [""]); + roundtrip(`["a"]`, ["a"]); + roundtrip(`[false]`, [false]); + roundtrip(`[null, 1, "1", {}]`, [_null, 1.0, "1", obj]); + roundtrip(`[null]`, [_null]); + roundtrip("[1\n]", [1.0]); + roundtrip(`[1,null,null,null,2]`, [1.0, _null, _null, _null, 2.0]); + set(&obj, "", 0.0); + roundtrip(`{"":0}`, obj); + reset(&obj); + set(&obj, "foo\0bar", 42.0); + roundtrip(`{"foo\u0000bar": 42}`, obj); + reset(&obj); + set(&obj, "min", -1.0e+28); + set(&obj, "max", 1.0e+28); + roundtrip(`{"min": -1.0e+28, "max": 1.0e+28}`, obj); + reset(&obj); + set(&obj, "id", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); + set(&obj2, "id", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); + set(&obj, "x", [obj2]); + roundtrip(`{"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}`, obj); + reset(&obj); + reset(&obj2); + set(&obj, "a", []); + roundtrip(`{"a":[]}`, obj); + roundtrip("{\n" `"a": []` "\n}", obj); + reset(&obj); + roundtrip(`"\u0060\u012a\u12AB"`, "\u0060\u012a\u12AB"); + roundtrip(`"\"\\\/\b\f\n\r\t"`, "\"\\/\b\f\n\r\t"); + roundtrip(`"\\u0000"`, `\u0000`); + roundtrip(`"\""`, `"`); + roundtrip(`"a/*b*/c/*d//e"`, "a/*b*/c/*d//e"); + roundtrip(`"\\a"`, `\a`); + roundtrip(`"\\n"`, `\n`); + roundtrip(`"\u0012"`, "\u0012"); + roundtrip(`[ "asd"]`, ["asd"]); + roundtrip(`"new\u000Aline"`, "new\nline"); + roundtrip(`"\u0000"`, "\0"); + roundtrip(`"\u002c"`, "\u002c"); + roundtrip(`"asd "`, "asd "); + roundtrip(`" "`, " "); + roundtrip(`"\u0821"`, "\u0821"); + roundtrip(`"\u0123"`, "\u0123"); + roundtrip(`"\u0061\u30af\u30EA\u30b9"`, "\u0061\u30af\u30EA\u30b9"); + roundtrip(`"\uA66D"`, "\uA66D"); + roundtrip(`"\u005C"`, `\`); + roundtrip(`"\u0022"`, `"`); + roundtrip(`""`, ""); + roundtrip(` [] `, []); + + errassert(`[1,,]`, (1, 4)); + errassert(`[1 true]`, (1, 7)); + errassert(`["": 1]`, (1, 4)); + errassert(`[,1]`, (1, 2)); + errassert(`[1,,2]`, (1, 4)); + errassert(`["",]`, (1, 5)); + errassert(`["x"`, (1, 5)); + errassert(`[x`, (1, 2)); + errassert(`[3[4]]`, (1, 3)); + errassert(`[1:2]`, (1, 3)); + errassert(`[,]`, (1, 2)); + errassert(`[-]`, (1, 3)); + errassert(`[ , ""]`, (1, 5)); + errassert("[\"a\",\n4\n,1,", (3, 4)); + errassert(`[1,]`, (1, 4)); + errassert("[\"\va\"\\f", (1, 3)); + errassert(`[*]`, (1, 2)); + errassert(`[1,`, (1, 4)); + errassert("[1,\n1\n,1", (3, 3)); + errassert(`[{}`, (1, 4)); + errassert(`["x", truth]`, (1, 11)); + errassert(`{[: "x"}`, (1, 2)); + errassert(`{"x", null}`, (1, 5)); + errassert(`{"x"::"b"}`, (1, 6)); + errassert(`{"a":"a" 123}`, (1, 12)); + errassert(`{"a" b}`, (1, 6)); + errassert(`{:"b"}`, (1, 2)); + errassert(`{"a" "b"}`, (1, 8)); + errassert(`{"a":`, (1, 6)); + errassert(`{"a"`, (1, 5)); + errassert(`{1:1}`, (1, 2)); + errassert(`{9999E9999:1}`, (1, 10)); + errassert(`{null:null,null:null}`, (1, 5)); + errassert(`{"id":0,,,,,}`, (1, 9)); + errassert(`{'a':0}`, (1, 2)); + errassert(`{"id":0,}`, (1, 9)); + errassert(`{"a":"b",,"c":"d"}`, (1, 10)); + errassert(`{true: false}`, (1, 5)); + errassert(`{"a":"a`, (1, 8)); + errassert(`{ "foo" : "bar", "a" }`, (1, 22)); + errassert(` `, (1, 2)); + errassert(``, (1, 1)); + errassert(`["asd]`, (1, 7)); + errassert(`True`, (1, 4)); + errassert(`]`, (1, 1)); + errassert(`}`, (1, 1)); + errassert(`{"x": true,`, (1, 12)); + errassert(`[`, (1, 2)); + errassert(`{`, (1, 2)); + errassert(``, (1, 1)); + errassert("\0", (1, 1)); + errassert(`{"":`, (1, 5)); + errassert(`['`, (1, 2)); + errassert(`["`, (1, 3)); + errassert(`[,`, (1, 2)); + errassert(`[{`, (1, 3)); + errassert(`{[`, (1, 2)); + errassert(`{]`, (1, 2)); + errassert(`[}`, (1, 2)); + errassert(`{'`, (1, 2)); + errassert(`{"`, (1, 3)); + errassert(`{,`, (1, 2)); + errassert(`["\{["\{["\{["\{`, (1, 4)); + errassert(`*`, (1, 1)); + errassert(`\u000A""`, (1, 1)); + errassert("\f", (1, 1)); +}; + +@test fn nestlimit() void = { + const s = `{ "foo": [[[{"bar": ["baz"]}]]] }`; + const val = loadstr(s, 6: nestlimit)!; + finish(val); + assert(loadstr(s, 5: nestlimit) is limitreached); +}; diff --git a/encoding/json/+test/test_value.ha b/encoding/json/+test/test_value.ha new file mode 100644 index 0000000..eca7dcf --- /dev/null +++ b/encoding/json/+test/test_value.ha @@ -0,0 +1,49 @@ +// License: MPL-2.0 +// (c) 2022 Drew DeVault + +@test fn object() void = { + let obj = newobject(); + defer finish(obj); + + set(&obj, "hello", "world"); + set(&obj, "foo", "bar"); + set(&obj, "the answer", 42.0); + + // XXX: Match overhaul? + assert(*(get(&obj, "hello") as *value) as str == "world"); + assert(*(get(&obj, "foo") as *value) as str == "bar"); + assert(*(get(&obj, "the answer") as *value) as f64 == 42.0); + assert(get(&obj, "nonexistent") is void); + + del(&obj, "hello"); + assert(get(&obj, "hello") is void); +}; + +@test fn iterator() void = { + let obj = newobject(); + defer finish(obj); + + set(&obj, "hello", "world"); + set(&obj, "foo", "bar"); + set(&obj, "the answer", 42.0); + + let it = iter(&obj); + assert(next(&it) is (const str, const *value)); + assert(next(&it) is (const str, const *value)); + assert(next(&it) is (const str, const *value)); + assert(next(&it) is void); +}; + +@test fn equal() void = { + let a = newobject(); + defer finish(a); + set(&a, "a", 42.0); + set(&a, "A", "hello"); + + let b = newobject(); + defer finish(b); + set(&b, "A", "hello"); + set(&b, "a", 42.0); + + assert(equal(a, b)); +}; diff --git a/encoding/json/COPYING b/encoding/json/COPYING new file mode 100644 index 0000000..c257317 --- /dev/null +++ b/encoding/json/COPYING @@ -0,0 +1,367 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. diff --git a/encoding/json/README b/encoding/json/README new file mode 100644 index 0000000..fa917d5 --- /dev/null +++ b/encoding/json/README @@ -0,0 +1,15 @@ +This module provides an implementation of the JavaScript Object Notation (JSON) +format, as defined by RFC 8259. Note that several other, incompatible +specifications exist. This implementation does not include any extensions; only +features which are strictly required by the spec are implemented. + +A lexer for JSON values is provided, which may be initialized with [[lex]] and +provides tokens via [[next]], and which uses a relatively small amount of memory +and provides relatively few guarantees regarding the compliance of the input with +the JSON grammar. + +Additionally, the [[value]] type is provided to store any value JSON value, as +well as helpers like [[newobject]], [[get]], and [[set]]. One can load a JSON +value from an input stream into a heap-allocated [[value]] via [[load]], which +enforces all of JSON's grammar constraints and returns an object which must be +freed with [[finish]]. diff --git a/encoding/json/dump.ha b/encoding/json/dump.ha new file mode 100644 index 0000000..7e7dd8d --- /dev/null +++ b/encoding/json/dump.ha @@ -0,0 +1,81 @@ +// License: MPL-2.0 +// (c) 2022 Sebastian +use fmt; +use io; +use strings; +use memio; + +// Dumps a [[value]] into an [[io::handle]] as a string without any additional +// formatting. +export fn dump(out: io::handle, val: value) (size | io::error) = { + let z = 0z; + match (val) { + case let v: (f64 | bool) => + z += fmt::fprint(out, v)?; + case let s: str => + z += fmt::fprint(out, `"`)?; + let it = strings::iter(s); + for (const r => strings::next(&it)) { + switch (r) { + case '\b' => + z += fmt::fprint(out, `\b`)?; + case '\f' => + z += fmt::fprint(out, `\f`)?; + case '\n' => + z += fmt::fprint(out, `\n`)?; + case '\r' => + z += fmt::fprint(out, `\r`)?; + case '\t' => + z += fmt::fprint(out, `\t`)?; + case '\"' => + z += fmt::fprint(out, `\"`)?; + case '\\' => + z += fmt::fprint(out, `\\`)?; + case => + if (iscntrl(r)) { + z += fmt::fprintf(out, `\u{:.4x}`, + r: u32)?; + } else { + z += fmt::fprint(out, r)?; + }; + }; + }; + z += fmt::fprint(out, `"`)?; + case _null => + z += fmt::fprint(out, "null")?; + case let a: []value => + z += fmt::fprint(out, "[")?; + for (let i = 0z; i < len(a); i += 1) { + z += dump(out, a[i])?; + if (i < len(a) - 1) { + z += fmt::fprint(out, ",")?; + }; + }; + z += fmt::fprint(out, "]")?; + case let o: object => + z += fmt::fprint(out, "{")?; + let comma = false; + let it = iter(&o); + for (true) match (next(&it)) { + case void => break; + case let pair: (const str, const *value) => + if (comma) { + z += fmt::fprint(out, ",")?; + }; + comma = true; + z += dump(out, pair.0)?; + z += fmt::fprint(out, ":")?; + z += dump(out, *pair.1)?; + }; + z += fmt::fprint(out, "}")?; + }; + return z; +}; + +// Dumps a [[value]] into a string without any additional formatting. The caller +// must free the return value. +export fn dumpstr(val: value) str = { + let s = memio::dynamic(); + dump(&s, val)!; + return memio::string(&s)!; +}; diff --git a/encoding/json/lex.ha b/encoding/json/lex.ha new file mode 100644 index 0000000..7b9bf12 --- /dev/null +++ b/encoding/json/lex.ha @@ -0,0 +1,417 @@ +// License: MPL-2.0 +// (c) 2022 Drew DeVault +use ascii; +use bufio; +use encoding::utf8; +use io; +use os; +use strconv; +use strings; +use memio; + +export type lexer = struct { + src: io::handle, + strbuf: memio::stream, + un: (token | void), + rb: (rune | void), + loc: (uint, uint), + prevloc: (uint, uint), + nextloc: (uint, uint), + prevrloc: (uint, uint), +}; + +// Creates a new JSON lexer. The caller may obtain tokens with [[lex]] and +// should pass the result to [[close]] when they're done with it. +export fn newlexer(src: io::handle) lexer = lexer { + src = src, + strbuf = memio::dynamic(), + un = void, + rb = void, + loc = (1, 0), + ... +}; + +// Frees state associated with a JSON lexer. +export fn close(lex: *lexer) void = { + io::close(&lex.strbuf)!; +}; + +// Returns the next token from a JSON lexer. The return value is borrowed from +// the lexer and will be overwritten on subsequent calls. +export fn lex(lex: *lexer) (token | io::EOF | error) = { + match (lex.un) { + case void => + lex.prevloc = lex.loc; + case let tok: token => + lex.un = void; + lex.prevloc = lex.loc; + lex.loc = lex.nextloc; + return tok; + }; + + const rn = match (nextrunews(lex)?) { + case io::EOF => + return io::EOF; + case let rn: rune => + yield rn; + }; + + switch (rn) { + case '[' => + return arraystart; + case ']' => + return arrayend; + case '{' => + return objstart; + case '}' => + return objend; + case ',' => + return comma; + case ':' => + return colon; + case '"' => + return scan_str(lex)?; + case => + yield; + }; + + if (ascii::isdigit(rn) || rn == '-') { + unget(lex, rn); + return scan_number(lex)?; + }; + + if (!ascii::isalpha(rn)) { + return lex.loc: invalid; + }; + + unget(lex, rn); + const word = scan_word(lex)?; + switch (word) { + case "true" => + return true; + case "false" => + return false; + case "null" => + return _null; + case => + return lex.loc: invalid; + }; +}; + +// "Unlexes" a token from the lexer, such that the next call to [[lex]] will +// return that token again. Only one token can be unlexed at a time, otherwise +// the program will abort. +export fn unlex(lex: *lexer, tok: token) void = { + assert(lex.un is void, "encoding::json::unlex called twice in a row"); + lex.un = tok; + lex.nextloc = lex.loc; + lex.loc = lex.prevloc; +}; + +// Scans until encountering a non-alphabetical character, returning the +// resulting word. +fn scan_word(lex: *lexer) (str | error) = { + memio::reset(&lex.strbuf); + + for (true) { + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + break; + }; + if (!ascii::isalpha(rn)) { + unget(lex, rn); + break; + }; + memio::appendrune(&lex.strbuf, rn)!; + }; + + return memio::string(&lex.strbuf)!; +}; + +type numstate = enum { + SIGN, + START, + ZERO, + INTEGER, + FRACSTART, + FRACTION, + EXPSIGN, + EXPSTART, + EXPONENT, +}; + +fn scan_number(lex: *lexer) (token | error) = { + memio::reset(&lex.strbuf); + + let state = numstate::SIGN; + for (true) { + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + break; + }; + + switch (state) { + case numstate::SIGN => + state = numstate::START; + if (rn != '-') { + unget(lex, rn); + continue; + }; + case numstate::START => + switch (rn) { + case '0' => + state = numstate::ZERO; + case => + if (!ascii::isdigit(rn)) { + return lex.loc: invalid; + }; + state = numstate::INTEGER; + }; + case numstate::ZERO => + switch (rn) { + case '.' => + state = numstate::FRACSTART; + case 'e', 'E' => + state = numstate::EXPSIGN; + case => + if (ascii::isdigit(rn)) { + return lex.loc: invalid; + }; + unget(lex, rn); + break; + }; + case numstate::INTEGER => + switch (rn) { + case '.' => + state = numstate::FRACSTART; + case 'e', 'E' => + state = numstate::EXPSIGN; + case => + if (!ascii::isdigit(rn)) { + unget(lex, rn); + break; + }; + }; + case numstate::FRACSTART => + if (!ascii::isdigit(rn)) { + return lex.loc: invalid; + }; + state = numstate::FRACTION; + case numstate::FRACTION => + switch (rn) { + case 'e', 'E' => + state = numstate::EXPSIGN; + case => + if (!ascii::isdigit(rn)) { + unget(lex, rn); + break; + }; + }; + case numstate::EXPSIGN => + state = numstate::EXPSTART; + if (rn != '+' && rn != '-') { + unget(lex, rn); + continue; + }; + case numstate::EXPSTART => + if (!ascii::isdigit(rn)) { + return lex.loc: invalid; + }; + state = numstate::EXPONENT; + case numstate::EXPONENT => + if (!ascii::isdigit(rn)) { + unget(lex, rn); + break; + }; + }; + + memio::appendrune(&lex.strbuf, rn)!; + }; + + match (strconv::stof64(memio::string(&lex.strbuf)!)) { + case let f: f64 => + return f; + case => + return lex.loc: invalid; + }; +}; + +fn scan_str(lex: *lexer) (token | error) = { + memio::reset(&lex.strbuf); + + for (true) { + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + lex.loc.1 += 1; + return lex.loc: invalid; + }; + + switch (rn) { + case '"' => + break; + case '\\' => + const rn = scan_escape(lex)?; + memio::appendrune(&lex.strbuf, rn)!; + case => + if (iscntrl(rn)) { + return lex.loc: invalid; + }; + memio::appendrune(&lex.strbuf, rn)!; + }; + }; + + return memio::string(&lex.strbuf)!; +}; + +fn scan_escape(lex: *lexer) (rune | error) = { + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + return lex.loc: invalid; + }; + + switch (rn) { + case '\"' => + return '\"'; + case '\\' => + return '\\'; + case '/' => + return '/'; + case 'b' => + return '\b'; + case 'f' => + return '\f'; + case 'n' => + return '\n'; + case 'r' => + return '\r'; + case 't' => + return '\t'; + case 'u' => + const u = scan_escape_codepoint(lex)?; + + if (u >= 0xd800 && u <= 0xdfff) { + if (u >= 0xdc00) { + return lex.loc: invalid; + }; + + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + return lex.loc: invalid; + }; + if (rn != '\\') { + return lex.loc: invalid; + }; + const rn = match (nextrune(lex)?) { + case let rn: rune => + yield rn; + case io::EOF => + return lex.loc: invalid; + }; + if (rn != 'u') { + return lex.loc: invalid; + }; + + const v = scan_escape_codepoint(lex)?; + if (v < 0xdc00 || v > 0xdfff) { + return lex.loc: invalid; + }; + + const hi = u: u32 & 0x03ff; + const lo = v: u32 & 0x03ff; + return ((hi >> 10 | lo) + 0x10000): rune; + }; + + return u: u32: rune; + case => + return lex.loc: invalid; + }; +}; + +fn scan_escape_codepoint(lex: *lexer) (u16 | error) = { + let buf: [4]u8 = [0...]; + match (io::readall(lex.src, buf)?) { + case io::EOF => + return lex.loc: invalid; + case size => + yield; + }; + const s = match (strings::fromutf8(buf)) { + case let s: str => + yield s; + case => + return lex.loc: invalid; + }; + match (strconv::stou16(s, strconv::base::HEX)) { + case let u: u16 => + lex.loc.1 += 4; + return u; + case => + return lex.loc: invalid; + }; +}; + +// Gets the next rune from the lexer. +fn nextrune(lex: *lexer) (rune | io::EOF | error) = { + if (lex.rb is rune) { + lex.prevrloc = lex.loc; + const r = lex.rb as rune; + lex.rb = void; + if (r == '\n') { + lex.loc = (lex.loc.0 + 1, 0); + } else { + lex.loc.1 += 1; + }; + return r; + }; + match (bufio::read_rune(lex.src)) { + case let err: io::error => + return err; + case utf8::invalid => + return lex.loc: invalid; + case io::EOF => + return io::EOF; + case let rn: rune => + lex.prevrloc = lex.loc; + if (rn == '\n') { + lex.loc = (lex.loc.0 + 1, 0); + } else { + lex.loc.1 += 1; + }; + return rn; + }; +}; + +// Like nextrune but skips whitespace. +fn nextrunews(lex: *lexer) (rune | io::EOF | error) = { + for (true) { + match (nextrune(lex)?) { + case let rn: rune => + if (isspace(rn)) { + continue; + }; + return rn; + case io::EOF => + return io::EOF; + }; + }; +}; + +fn unget(lex: *lexer, r: rune) void = { + assert(lex.rb is void); + lex.rb = r; + lex.loc = lex.prevrloc; +}; + +fn iscntrl(r: rune) bool = r: u32 < 0x20; + +fn isspace(r: rune) bool = ascii::isspace(r) && r != '\f'; diff --git a/encoding/json/load.ha b/encoding/json/load.ha new file mode 100644 index 0000000..8dc2b56 --- /dev/null +++ b/encoding/json/load.ha @@ -0,0 +1,148 @@ +use memio; +use io; +use strings; +use types; + +// Options for [[load]]. +export type load_option = nestlimit; + +// The maximum number of nested objects or arrays that can be entered before +// erroring out. +export type nestlimit = uint; + +// Parses a JSON value from the given [[io::handle]], returning the value or an +// error. The return value is allocated on the heap; use [[finish]] to free it +// up when you're done using it. +// +// By default, this function assumes non-antagonistic inputs, and does not limit +// recursion depth or memory usage. You may want to set a custom [[nestlimit]], +// or incorporate an [[io::limitreader]] or similar. Alternatively, you can use +// the JSON lexer ([[lex]]) directly if dealing with potentially malicious +// inputs. +export fn load(src: io::handle, opts: load_option...) (value | error) = { + let limit = types::UINT_MAX; + for (let i = 0z; i < len(opts); i += 1) { + limit = opts[i]: nestlimit: uint; + }; + const lex = newlexer(src); + defer close(&lex); + return _load(&lex, 0, limit); +}; + +// Parses a JSON value from the given string, returning the value or an error. +// The return value is allocated on the heap; use [[finish]] to free it up when +// you're done using it. +// +// See the documentation for [[load]] for information on dealing with +// potentially malicious inputs. +export fn loadstr(input: str, opts: load_option...) (value | error) = { + let src = memio::fixed(strings::toutf8(input)); + return load(&src, opts...); +}; + +fn _load(lexer: *lexer, level: uint, limit: uint) (value | error) = { + const tok = mustscan(lexer)?; + match (tok) { + case _null => + return _null; + case let b: bool => + return b; + case let f: f64 => + return f; + case let s: str => + return strings::dup(s); + case arraystart => + if (level == limit) { + return limitreached; + }; + return _load_array(lexer, level + 1, limit); + case objstart => + if (level == limit) { + return limitreached; + }; + return _load_obj(lexer, level + 1, limit); + case (arrayend | objend | colon | comma) => + return lexer.loc: invalid; + }; +}; + +fn _load_array(lexer: *lexer, level: uint, limit: uint) (value | error) = { + let success = false; + let array: []value = []; + defer if (!success) finish(array); + let tok = mustscan(lexer)?; + match (tok) { + case arrayend => + success = true; + return array; + case => + unlex(lexer, tok); + }; + + for (true) { + append(array, _load(lexer, level, limit)?); + + tok = mustscan(lexer)?; + match (tok) { + case comma => void; + case arrayend => break; + case => + return lexer.loc: invalid; + }; + }; + success = true; + return array; +}; + +fn _load_obj(lexer: *lexer, level: uint, limit: uint) (value | error) = { + let success = false; + let obj = newobject(); + defer if (!success) finish(obj); + let tok = mustscan(lexer)?; + match (tok) { + case objend => + success = true; + return obj; + case => + unlex(lexer, tok); + }; + + for (true) { + let tok = mustscan(lexer)?; + const key = match (tok) { + case let s: str => + yield strings::dup(s); + case => + return lexer.loc: invalid; + }; + defer free(key); + + tok = mustscan(lexer)?; + if (!(tok is colon)) { + return lexer.loc: invalid; + }; + + put(&obj, key, _load(lexer, level, limit)?); + + tok = mustscan(lexer)?; + match (tok) { + case comma => void; + case objend => break; + case => + return lexer.loc: invalid; + }; + }; + + success = true; + return obj; +}; + +fn mustscan(lexer: *lexer) (token | error) = { + match (lex(lexer)?) { + case io::EOF => + lexer.loc.1 += 1; + return lexer.loc: invalid; + case let tok: token => + return tok; + }; +}; diff --git a/encoding/json/path/path.ha b/encoding/json/path/path.ha new file mode 100644 index 0000000..819e9f5 --- /dev/null +++ b/encoding/json/path/path.ha @@ -0,0 +1,26 @@ +// A compiled JSONPath query. +export type query = []segment; + +export type segment_type = enum { + CHILD, + DESCENDANT, +}; + +export type segment = struct { + stype: segment_type, + selector: selector, +}; + +export type selector = (str | wild | index | slice | filter); + +export type wild = void; + +export type index = int; + +export type slice = struct { + start: (int | void), + end: (int | void), + step: (int | void), +}; + +export type filter = void; // TODO diff --git a/encoding/json/types.ha b/encoding/json/types.ha new file mode 100644 index 0000000..1e1b433 --- /dev/null +++ b/encoding/json/types.ha @@ -0,0 +1,50 @@ +// License: MPL-2.0 +// (c) 2022 Drew DeVault +use fmt; +use io; + +// An invalid JSON token was encountered at this location (line, column). +export type invalid = !(uint, uint); + +// The maximum nesting limit was reached. +export type limitreached = !void; + +// A tagged union of all possible errors returned from this module. +export type error = !(invalid | limitreached | io::error); + +// The JSON null value. +export type _null = void; + +// The '[' token, signaling the start of a JSON array. +export type arraystart = void; + +// The ']' token, signaling the end of a JSON array. +export type arrayend = void; + +// The '{' token, signaling the start of a JSON object. +export type objstart = void; + +// The '}' token, signaling the end of a JSON object. +export type objend = void; + +// The ':' token. +export type colon = void; + +// The ',' token. +export type comma = void; + +// All tokens which can be returned from the JSON tokenizer. +export type token = (arraystart | arrayend | objstart | + objend | colon | comma | str | f64 | bool | _null); + +// Converts an [[error]] into a human-friendly string. +export fn strerror(err: error) const str = { + static let buf: [53]u8 = [0...]; + match (err) { + case let err: invalid => + return fmt::bsprintf(buf, + "{}:{}: Invalid JSON token encountered", err.0, err.1); + case let err: io::error => + return io::strerror(err); + }; +}; diff --git a/encoding/json/value.ha b/encoding/json/value.ha new file mode 100644 index 0000000..fe68688 --- /dev/null +++ b/encoding/json/value.ha @@ -0,0 +1,217 @@ +// License: MPL-2.0 +// (c) 2022 Drew DeVault +use hash::fnv; +use htab; +use strings; + +export type object = struct { + table: htab::table, +}; + +// A JSON value. +export type value = (f64 | str | bool | _null | []value | object); + +type entry = (str, value); + +fn htab_eq(ctx: *opaque, ent: *opaque) bool = + *(ctx: *str) == (ent: *entry).0; + +fn _get(obj: *object, hash: u64, key: str) nullable *entry = + htab::get(&obj.table, hash, &htab_eq, &key, size(entry)): + nullable *entry; + +// Initializes a new (empty) JSON object. Call [[finish]] to free associated +// resources when you're done using it. +export fn newobject() object = { + return object { + table = htab::new(0, size(entry)), + }; +}; + +// Gets a value from a JSON object. The return value is borrowed from the +// object. +export fn get(obj: *object, key: str) (*value | void) = { + const hash = fnv::string(key); + match (_get(obj, hash, key)) { + case let ent: *entry => + return &ent.1; + case null => + return void; + }; +}; + +// Sets a value in a JSON object. The key and value will be duplicated. +export fn set(obj: *object, key: const str, val: const value) void = { + put(obj, key, dup(val)); +}; + +// Sets a value in a JSON object. The key will be duplicated. The object will +// assume ownership over the value, without duplicating it. +export fn put(obj: *object, key: const str, val: const value) void = { + const hash = fnv::string(key); + match (_get(obj, hash, key)) { + case let ent: *entry => + finish(ent.1); + ent.1 = val; + case null => + const ent = htab::add(&obj.table, hash, size(entry)): *entry; + *ent = (strings::dup(key), val); + }; +}; + +// Deletes values from a JSON object, if they are present. +export fn del(obj: *object, keys: const str...) void = { + for (let i = 0z; i < len(keys); i += 1) { + match (take(obj, keys[i])) { + case let val: value => + finish(val); + case void => void; + }; + }; +}; + +// Deletes a key from a JSON object, returning its previous value, if any. +// The caller is responsible for freeing the value. +export fn take(obj: *object, key: const str) (value | void) = { + const hash = fnv::string(key); + match (_get(obj, hash, key)) { + case let ent: *entry => + free(ent.0); + const val = ent.1; + htab::del(&obj.table, ent, size(entry)); + return val; + case null => void; + }; +}; + +// Clears all values from a JSON object, leaving it empty. +export fn reset(obj: *object) void = { + let it = iter(obj); + for (true) match (next(&it)) { + case void => + break; + case let v: (const str, const *value) => + del(obj, v.0); + }; +}; + +// Returns the number of key/value pairs in a JSON object. +export fn count(obj: *object) size = { + return htab::count(&obj.table); +}; + +export type iterator = struct { + iter: htab::iterator, +}; + +// Creates an iterator that enumerates over the key/value pairs in an +// [[object]]. +export fn iter(obj: *object) iterator = { + return iterator { iter = htab::iter(&obj.table) }; +}; + +// Returns the next key/value pair from this iterator, or void if none remain. +export fn next(iter: *iterator) ((const str, const *value) | void) = { + match (htab::next(&iter.iter, size(entry))) { + case let ent: *opaque => + const ent = ent: *entry; + return (ent.0, &ent.1); + case null => void; + }; +}; + +// Duplicates a JSON value. The caller must pass the return value to [[finish]] +// to free associated resources when they're done using it. +export fn dup(val: value) value = { + match (val) { + case let s: str => + return strings::dup(s); + case let v: []value => + let new: []value = alloc([], len(v)); + for (let i = 0z; i < len(v); i += 1) { + append(new, dup(v[i])); + }; + return new; + case let o: object => + let new = newobject(); + const i = iter(&o); + for (true) { + const pair = match (next(&i)) { + case void => + break; + case let pair: (const str, const *value) => + yield pair; + }; + set(&new, pair.0, *pair.1); + }; + return new; + case => + return val; + }; +}; + +// Checks two JSON values for equality. +export fn equal(a: value, b: value) bool = { + match (a) { + case _null => + return b is _null; + case let a: bool => + return b is bool && a == b as bool; + case let a: f64 => + return b is f64 && a == b as f64; + case let a: str => + return b is str && a == b as str; + case let a: []value => + if (!(b is []value)) return false; + const b = b as []value; + if (len(a) != len(b)) return false; + for (let i = 0z; i < len(a); i += 1) { + if (!equal(a[i], b[i])) { + return false; + }; + }; + return true; + case let a: object => + if (!(b is object)) return false; + let b = b as object; + if (count(&a) != count(&b)) { + return false; + }; + let a = iter(&a); + for (true) match (next(&a)) { + case let a: (const str, const *value) => + match (get(&b, a.0)) { + case let b: *value => + if (!equal(*a.1, *b)) { + return false; + }; + case void => return false; + }; + case void => break; + }; + return true; + }; +}; + +// Frees state associated with a JSON value. +export fn finish(val: value) void = { + match (val) { + case let s: str => + free(s); + case let v: []value => + for (let i = 0z; i < len(v); i += 1) { + finish(v[i]); + }; + free(v); + case let o: object => + let i = iter(&o); + for (true) match (next(&i)) { + case let ent: (const str, const *value) => + free(ent.0); + finish(*ent.1); + case void => break; + }; + htab::finish(&o.table); + case => void; + }; +}; -- cgit v1.2.3