「‍」 Lingenic

hsv

(⤓.hpp ◇.hpp); γ ≜ [2026-01-27T082219.270, 2026-01-27T082219.270] ∧ |γ| = 1

/**
 * HSV - Hierarchical Separated Values
 *
 * A text-based file format and streaming protocol using ASCII control characters.
 * Unlimited nesting (like JSON). No escaping required. Binary data supported.
 *
 * Copyright 2026 Danslav Slavenskoj, Lingenic LLC
 * License: CC0 1.0 - Public Domain
 * https://creativecommons.org/publicdomain/zero/1.0/
 * You may use this code for any purpose without attribution.
 *
 * Spec: https://hsvfile.com
 * Repo: https://github.com/LingenicLLC/HSV
 */

#ifndef HSV_HPP
#define HSV_HPP

#include <string>
#include <vector>
#include <map>
#include <variant>
#include <optional>

namespace hsv {

// Control characters
constexpr char SOH = '\x01';  // Start of Header
constexpr char STX = '\x02';  // Start of Text (data block)
constexpr char ETX = '\x03';  // End of Text
constexpr char EOT = '\x04';  // End of Transmission
constexpr char SO  = '\x0e';  // Shift Out (start nested)
constexpr char SI  = '\x0f';  // Shift In (end nested)
constexpr char DLE = '\x10';  // Data Link Escape (binary mode)
constexpr char FS  = '\x1c';  // File/Record Separator
constexpr char GS  = '\x1d';  // Group/Array Separator
constexpr char RS  = '\x1e';  // Record/Property Separator
constexpr char US  = '\x1f';  // Unit/Key-Value Separator

// Forward declaration
struct Value;

// Value types
using Array = std::vector<Value>;
using Object = std::map<std::string, Value>;

struct Value {
    std::variant<std::string, Array, Object> data;

    Value() : data(std::string{}) {}
    Value(const std::string& s) : data(s) {}
    Value(std::string&& s) : data(std::move(s)) {}
    Value(const Array& a) : data(a) {}
    Value(Array&& a) : data(std::move(a)) {}
    Value(const Object& o) : data(o) {}
    Value(Object&& o) : data(std::move(o)) {}

    bool is_string() const { return std::holds_alternative<std::string>(data); }
    bool is_array() const { return std::holds_alternative<Array>(data); }
    bool is_object() const { return std::holds_alternative<Object>(data); }

    const std::string& as_string() const { return std::get<std::string>(data); }
    const Array& as_array() const { return std::get<Array>(data); }
    const Object& as_object() const { return std::get<Object>(data); }

    std::string& as_string() { return std::get<std::string>(data); }
    Array& as_array() { return std::get<Array>(data); }
    Object& as_object() { return std::get<Object>(data); }
};

struct Document {
    std::optional<Object> header;
    std::vector<Object> records;
};

namespace detail {

// Extract DLE+STX...DLE+ETX binary sections
inline std::pair<std::string, std::map<std::string, std::string>>
extract_binary_sections(const std::string& text) {
    std::string result;
    std::map<std::string, std::string> binaries;
    size_t i = 0;
    int placeholder_count = 0;

    while (i < text.size()) {
        if (text[i] == DLE && i + 1 < text.size() && text[i + 1] == STX) {
            size_t j = i + 2;
            std::string binary_data;

            while (j < text.size()) {
                if (text[j] == DLE && j + 1 < text.size()) {
                    if (text[j + 1] == ETX) {
                        // End of binary section
                        std::string placeholder = std::string("\0BINARY", 7) +
                            std::to_string(placeholder_count) + std::string("\0", 1);
                        binaries[placeholder] = binary_data;
                        result += placeholder;
                        placeholder_count++;
                        i = j + 2;
                        goto next;
                    } else if (text[j + 1] == DLE) {
                        // Escaped DLE
                        binary_data += DLE;
                        j += 2;
                        continue;
                    }
                }
                binary_data += text[j];
                j++;
            }

            // Unterminated binary section
            result += text[i];
            i++;
            continue;
        }
        result += text[i];
        i++;
        next:;
    }

    return {result, binaries};
}

// Restore binary placeholders
inline std::string restore_binaries(const std::string& value,
    const std::map<std::string, std::string>& binaries) {
    std::string result = value;
    for (const auto& [placeholder, data] : binaries) {
        size_t pos;
        while ((pos = result.find(placeholder)) != std::string::npos) {
            result.replace(pos, placeholder.size(), data);
        }
    }
    return result;
}

// Split string by separator, respecting SO/SI nesting
inline std::vector<std::string> split_respecting_nesting(const std::string& text, char sep) {
    std::vector<std::string> parts;
    std::string current;
    int depth = 0;

    for (char c : text) {
        if (c == SO) {
            depth++;
            current += c;
        } else if (c == SI) {
            depth--;
            current += c;
        } else if (c == sep && depth == 0) {
            parts.push_back(current);
            current.clear();
        } else {
            current += c;
        }
    }

    if (!current.empty() || !parts.empty()) {
        parts.push_back(current);
    }

    return parts;
}

// Forward declarations
inline Value parse_value(const std::string& value,
    const std::map<std::string, std::string>& binaries);
inline Object parse_object(const std::string& content,
    const std::map<std::string, std::string>& binaries);

inline Value parse_value(const std::string& value,
    const std::map<std::string, std::string>& binaries) {
    std::string restored = restore_binaries(value, binaries);

    // Check for nested structure (SO at start, SI at end)
    if (restored.size() >= 2 && restored.front() == SO && restored.back() == SI) {
        std::string inner = restored.substr(1, restored.size() - 2);
        return Value(parse_object(inner, binaries));
    }

    // Check for array
    if (restored.find(GS) != std::string::npos) {
        auto parts = split_respecting_nesting(restored, GS);
        Array arr;
        for (const auto& p : parts) {
            arr.push_back(parse_value(p, binaries));
        }
        return Value(std::move(arr));
    }

    return Value(restored);
}

inline Object parse_object(const std::string& content,
    const std::map<std::string, std::string>& binaries) {
    Object obj;

    auto props = split_respecting_nesting(content, RS);
    for (const auto& prop : props) {
        auto parts = split_respecting_nesting(prop, US);
        if (parts.size() >= 2) {
            std::string key = parts[0];
            std::string val;
            for (size_t i = 1; i < parts.size(); i++) {
                if (i > 1) val += US;
                val += parts[i];
            }
            obj[key] = parse_value(val, binaries);
        }
    }

    return obj;
}

} // namespace detail

// Main parse function
inline Document parse(const std::string& text) {
    auto [processed_text, binaries] = detail::extract_binary_sections(text);

    Document doc;
    size_t i = 0;

    while (i < processed_text.size()) {
        char c = processed_text[i];

        if (c == SOH) {
            // Find STX
            size_t stx_pos = processed_text.find(STX, i + 1);
            if (stx_pos == std::string::npos) {
                i++;
                continue;
            }

            // Parse header
            std::string header_content = processed_text.substr(i + 1, stx_pos - i - 1);
            doc.header = detail::parse_object(header_content, binaries);

            // Find ETX
            size_t etx_pos = processed_text.find(ETX, stx_pos + 1);
            if (etx_pos == std::string::npos) {
                i = stx_pos + 1;
                continue;
            }

            // Parse records
            std::string data_content = processed_text.substr(stx_pos + 1, etx_pos - stx_pos - 1);
            for (const auto& record : detail::split_respecting_nesting(data_content, FS)) {
                auto obj = detail::parse_object(record, binaries);
                if (!obj.empty()) {
                    doc.records.push_back(std::move(obj));
                }
            }

            i = etx_pos + 1;
        } else if (c == STX) {
            // Find ETX
            size_t etx_pos = processed_text.find(ETX, i + 1);
            if (etx_pos == std::string::npos) {
                i++;
                continue;
            }

            // Parse records
            std::string data_content = processed_text.substr(i + 1, etx_pos - i - 1);
            for (const auto& record : detail::split_respecting_nesting(data_content, FS)) {
                auto obj = detail::parse_object(record, binaries);
                if (!obj.empty()) {
                    doc.records.push_back(std::move(obj));
                }
            }

            i = etx_pos + 1;
        } else {
            i++;
        }
    }

    return doc;
}

// Helper: Get string value from object
inline std::string get_string(const Object& obj, const std::string& key) {
    auto it = obj.find(key);
    if (it != obj.end() && it->second.is_string()) {
        return it->second.as_string();
    }
    return "";
}

// Helper: Check if object has key
inline bool has_key(const Object& obj, const std::string& key) {
    return obj.find(key) != obj.end();
}

} // namespace hsv

#endif // HSV_HPP