「‍」 Lingenic

lib

(⤓.rs ◇.rs); γ ≜ [2026-01-27T072639.320, 2026-01-27T072639.320] ∧ |γ| = 1

// Copyright 2026 Danslav Slavenskoj, Lingenic LLC
// License: CC0 1.0 - Public Domain
// https://creativecommons.org/publicdomain/zero/1.0/
// You may use this code for any purpose without attribution.
//
// Spec: https://hsvfile.com
// Repo: https://github.com/LingenicLLC/HSV

//! # HSV - Hierarchical Separated Values
//!
//! A text-based file format and streaming protocol using ASCII control characters.
//! Unlimited nesting (like JSON). No escaping required. Binary data supported.
//!
//! ## Features
//!
//! - **SOH headers** - Metadata before data blocks
//! - **STX/ETX framing** - Clear block boundaries
//! - **SO/SI nesting** - Unlimited depth structures
//! - **DLE binary mode** - Embed any bytes
//! - **FS/GS/RS/US structure** - Records, arrays, properties, key-value pairs
//!
//! ## Example
//!
//! ```rust
//! use hsv::{parse, HsvValue};
//!
//! // Simple key-value record
//! let data = "\x02name\x1fAlice\x1eage\x1f30\x03";
//! let doc = hsv::parse(data);
//! assert_eq!(doc.records.len(), 1);
//! ```
//!
//! ## Control Characters
//!
//! | Code | Hex  | Name | Purpose |
//! |------|------|------|---------|
//! | SOH  | 0x01 | Start of Header | Begin header section |
//! | STX  | 0x02 | Start of Text | Begin data block |
//! | ETX  | 0x03 | End of Text | End data block |
//! | SO   | 0x0E | Shift Out | Start nested structure |
//! | SI   | 0x0F | Shift In | End nested structure |
//! | DLE  | 0x10 | Data Link Escape | Binary mode escape |
//! | FS   | 0x1C | File Separator | Record separator |
//! | GS   | 0x1D | Group Separator | Array element separator |
//! | RS   | 0x1E | Record Separator | Property separator |
//! | US   | 0x1F | Unit Separator | Key-value separator |

use rayon::prelude::*;
use std::collections::HashMap;

/// Control characters used in HSV format
pub mod chars {
    /// Start of Header (0x01)
    pub const SOH: char = '\x01';
    /// Start of Text - begins data block (0x02)
    pub const STX: char = '\x02';
    /// End of Text - ends data block (0x03)
    pub const ETX: char = '\x03';
    /// End of Transmission (0x04)
    pub const EOT: char = '\x04';
    /// Enquiry - request acknowledgment (0x05)
    pub const ENQ: char = '\x05';
    /// Acknowledge - success (0x06)
    pub const ACK: char = '\x06';
    /// Shift Out - start nested structure (0x0E)
    pub const SO: char = '\x0e';
    /// Shift In - end nested structure (0x0F)
    pub const SI: char = '\x0f';
    /// Data Link Escape - binary mode (0x10)
    pub const DLE: char = '\x10';
    /// XON - resume transmission (0x11)
    pub const XON: char = '\x11';
    /// XOFF - pause transmission (0x13)
    pub const XOFF: char = '\x13';
    /// Negative Acknowledge - error (0x15)
    pub const NAK: char = '\x15';
    /// Synchronous Idle - keepalive (0x16)
    pub const SYN: char = '\x16';
    /// Cancel operation (0x18)
    pub const CAN: char = '\x18';
    /// File Separator - record separator (0x1C)
    pub const FS: char = '\x1c';
    /// Group Separator - array element separator (0x1D)
    pub const GS: char = '\x1d';
    /// Record Separator - property separator (0x1E)
    pub const RS: char = '\x1e';
    /// Unit Separator - key-value separator (0x1F)
    pub const US: char = '\x1f';
}

use chars::*;

/// Represents an HSV value - can be a string, array, or nested object
#[derive(Debug, Clone, PartialEq)]
pub enum HsvValue {
    /// A string value
    String(String),
    /// An array of values (separated by GS)
    Array(Vec<HsvValue>),
    /// A nested object (wrapped in SO/SI)
    Object(HashMap<String, HsvValue>),
}

impl HsvValue {
    /// Returns the value as a string, if it is one
    pub fn as_str(&self) -> Option<&str> {
        match self {
            HsvValue::String(s) => Some(s),
            _ => None,
        }
    }

    /// Returns the value as an array, if it is one
    pub fn as_array(&self) -> Option<&Vec<HsvValue>> {
        match self {
            HsvValue::Array(a) => Some(a),
            _ => None,
        }
    }

    /// Returns the value as an object, if it is one
    pub fn as_object(&self) -> Option<&HashMap<String, HsvValue>> {
        match self {
            HsvValue::Object(o) => Some(o),
            _ => None,
        }
    }

    /// Returns true if this is a string value
    pub fn is_string(&self) -> bool {
        matches!(self, HsvValue::String(_))
    }

    /// Returns true if this is an array value
    pub fn is_array(&self) -> bool {
        matches!(self, HsvValue::Array(_))
    }

    /// Returns true if this is an object value
    pub fn is_object(&self) -> bool {
        matches!(self, HsvValue::Object(_))
    }
}

/// Result of parsing HSV text
#[derive(Debug, Clone)]
pub struct HsvDocument {
    /// Optional header (content between SOH and STX)
    pub header: Option<HashMap<String, HsvValue>>,
    /// List of records from data blocks
    pub records: Vec<HashMap<String, HsvValue>>,
}

impl HsvDocument {
    /// Create a new empty document
    pub fn new() -> Self {
        HsvDocument {
            header: None,
            records: Vec::new(),
        }
    }

    /// Returns true if the document has a header
    pub fn has_header(&self) -> bool {
        self.header.is_some()
    }

    /// Returns true if there are no records
    pub fn is_empty(&self) -> bool {
        self.records.is_empty()
    }

    /// Returns the number of records
    pub fn len(&self) -> usize {
        self.records.len()
    }
}

impl Default for HsvDocument {
    fn default() -> Self {
        Self::new()
    }
}

/// Extract DLE+STX...DLE+ETX binary sections and replace with placeholders
fn extract_binary_sections(text: &str) -> (String, HashMap<String, String>) {
    let mut result = String::new();
    let mut binaries = HashMap::new();
    let chars: Vec<char> = text.chars().collect();
    let mut i = 0;
    let mut placeholder_count = 0;

    while i < chars.len() {
        // Check for DLE+STX (binary start)
        if chars[i] == DLE && i + 1 < chars.len() && chars[i + 1] == STX {
            let mut j = i + 2;
            let mut binary_data = String::new();

            while j < chars.len() {
                if chars[j] == DLE && j + 1 < chars.len() {
                    if chars[j + 1] == ETX {
                        // End of binary section
                        let placeholder = format!("\0BINARY{}\0", placeholder_count);
                        // Unescape DLE+DLE -> DLE
                        let unescaped = unescape_binary(&binary_data);
                        binaries.insert(placeholder.clone(), unescaped);
                        result.push_str(&placeholder);
                        placeholder_count += 1;
                        i = j + 2;
                        break;
                    } else if chars[j + 1] == DLE {
                        // Escaped DLE
                        binary_data.push(DLE);
                        j += 2;
                        continue;
                    }
                }
                binary_data.push(chars[j]);
                j += 1;
            }

            if j >= chars.len() {
                // No closing DLE+ETX found
                result.push(chars[i]);
                i += 1;
            }
        } else {
            result.push(chars[i]);
            i += 1;
        }
    }

    (result, binaries)
}

/// Handle DLE escaping: DLE+DLE -> DLE
fn unescape_binary(data: &str) -> String {
    let chars: Vec<char> = data.chars().collect();
    let mut result = String::new();
    let mut i = 0;

    while i < chars.len() {
        if chars[i] == DLE && i + 1 < chars.len() && chars[i + 1] == DLE {
            result.push(DLE);
            i += 2;
        } else {
            result.push(chars[i]);
            i += 1;
        }
    }

    result
}

/// Replace binary placeholders with actual binary data
fn restore_binaries(value: &str, binaries: &HashMap<String, String>) -> String {
    let mut result = value.to_string();
    for (placeholder, data) in binaries {
        result = result.replace(placeholder, data);
    }
    result
}

/// Split by separator, but respect SO/SI nesting depth
fn split_respecting_nesting(text: &str, sep: char) -> Vec<String> {
    let mut parts = Vec::new();
    let mut current = String::new();
    let mut depth = 0;

    for c in text.chars() {
        match c {
            c if c == SO => {
                depth += 1;
                current.push(c);
            }
            c if c == SI => {
                depth -= 1;
                current.push(c);
            }
            c if c == sep && depth == 0 => {
                parts.push(current);
                current = String::new();
            }
            _ => {
                current.push(c);
            }
        }
    }

    if !current.is_empty() || !parts.is_empty() {
        parts.push(current);
    }

    parts
}

/// Parse a value, handling arrays (GS) and nested structures (SO/SI)
fn parse_value(value: &str, binaries: &HashMap<String, String>) -> HsvValue {
    let value = restore_binaries(value, binaries);

    // Check for nested structure (SO at start, SI at end)
    if value.starts_with(SO) && value.ends_with(SI) {
        let inner = &value[SO.len_utf8()..value.len() - SI.len_utf8()];
        return HsvValue::Object(parse_object(inner, binaries));
    }

    // Check for array (respect nesting)
    if value.contains(GS) {
        let parts = split_respecting_nesting(&value, GS);
        let array: Vec<HsvValue> = parts.iter().map(|p| parse_value(p, binaries)).collect();
        return HsvValue::Array(array);
    }

    HsvValue::String(value)
}

/// Parse an object from RS-separated properties
fn parse_object(content: &str, binaries: &HashMap<String, String>) -> HashMap<String, HsvValue> {
    let mut obj = HashMap::new();

    // Split by RS, respecting SO/SI nesting
    let props = split_respecting_nesting(content, RS);

    for prop in props {
        // Split by US, respecting SO/SI nesting
        let parts = split_respecting_nesting(&prop, US);
        if parts.len() >= 2 {
            let k = parts[0].clone();
            let v = parts[1..].join(&US.to_string());
            obj.insert(k, parse_value(&v, binaries));
        }
    }

    obj
}

/// Parse HSV text into structured data.
///
/// This function processes HSV-encoded text and returns an `HsvDocument`
/// containing any header and all records found in the text.
///
/// # Arguments
///
/// * `text` - The HSV-encoded text to parse
///
/// # Returns
///
/// An `HsvDocument` containing the parsed header (if present) and records.
///
/// # Example
///
/// ```rust
/// use hsv::parse;
///
/// let text = "\x02name\x1fAlice\x1eage\x1f30\x03";
/// let doc = parse(text);
/// assert_eq!(doc.records.len(), 1);
/// ```
pub fn parse(text: &str) -> HsvDocument {
    // First, extract binary sections
    let (text, binaries) = extract_binary_sections(text);
    let chars: Vec<char> = text.chars().collect();

    let mut header: Option<HashMap<String, HsvValue>> = None;
    let mut blocks: Vec<(usize, usize)> = Vec::new(); // (start, end) indices of data blocks

    let mut i = 0;
    while i < chars.len() {
        // Check for SOH (header start)
        if chars[i] == SOH {
            // Find STX (header end, data start)
            if let Some(stx_offset) = chars[i + 1..].iter().position(|&c| c == STX) {
                let stx_pos = i + 1 + stx_offset;

                // Parse header
                let header_content: String = chars[i + 1..stx_pos].iter().collect();
                header = Some(parse_object(&header_content, &binaries));

                // Find ETX (data end)
                if let Some(etx_offset) = chars[stx_pos + 1..].iter().position(|&c| c == ETX) {
                    let etx_pos = stx_pos + 1 + etx_offset;
                    blocks.push((stx_pos + 1, etx_pos));
                    i = etx_pos + 1;
                    continue;
                }
            }
            i += 1;
        }
        // Check for STX (data block without header)
        else if chars[i] == STX {
            if let Some(etx_offset) = chars[i + 1..].iter().position(|&c| c == ETX) {
                let etx_pos = i + 1 + etx_offset;
                blocks.push((i + 1, etx_pos));
                i = etx_pos + 1;
            } else {
                i += 1;
            }
        } else {
            i += 1;
        }
    }

    // Parse blocks in parallel
    let records: Vec<HashMap<String, HsvValue>> = blocks
        .par_iter()
        .flat_map(|(start, end)| {
            let block_content: String = chars[*start..*end].iter().collect();
            split_respecting_nesting(&block_content, FS)
                .into_iter()
                .filter_map(|record| {
                    let obj = parse_object(&record, &binaries);
                    if !obj.is_empty() {
                        Some(obj)
                    } else {
                        None
                    }
                })
                .collect::<Vec<_>>()
        })
        .collect();

    HsvDocument { header, records }
}

/// Parse HSV text sequentially (single-threaded).
///
/// Use this when you need deterministic ordering or are parsing small inputs
/// where parallel overhead isn't worth it.
pub fn parse_sequential(text: &str) -> HsvDocument {
    // First, extract binary sections
    let (text, binaries) = extract_binary_sections(text);
    let chars: Vec<char> = text.chars().collect();

    let mut header: Option<HashMap<String, HsvValue>> = None;
    let mut records: Vec<HashMap<String, HsvValue>> = Vec::new();

    let mut i = 0;
    while i < chars.len() {
        // Check for SOH (header start)
        if chars[i] == SOH {
            // Find STX (header end, data start)
            if let Some(stx_offset) = chars[i + 1..].iter().position(|&c| c == STX) {
                let stx_pos = i + 1 + stx_offset;

                // Parse header
                let header_content: String = chars[i + 1..stx_pos].iter().collect();
                header = Some(parse_object(&header_content, &binaries));

                // Find ETX (data end)
                if let Some(etx_offset) = chars[stx_pos + 1..].iter().position(|&c| c == ETX) {
                    let etx_pos = stx_pos + 1 + etx_offset;

                    // Parse data block
                    let block_content: String = chars[stx_pos + 1..etx_pos].iter().collect();
                    for record in split_respecting_nesting(&block_content, FS) {
                        let obj = parse_object(&record, &binaries);
                        if !obj.is_empty() {
                            records.push(obj);
                        }
                    }

                    i = etx_pos + 1;
                    continue;
                }
            }
            i += 1;
        }
        // Check for STX (data block without header)
        else if chars[i] == STX {
            if let Some(etx_offset) = chars[i + 1..].iter().position(|&c| c == ETX) {
                let etx_pos = i + 1 + etx_offset;

                // Parse data block
                let block_content: String = chars[i + 1..etx_pos].iter().collect();
                for record in split_respecting_nesting(&block_content, FS) {
                    let obj = parse_object(&record, &binaries);
                    if !obj.is_empty() {
                        records.push(obj);
                    }
                }

                i = etx_pos + 1;
            } else {
                i += 1;
            }
        } else {
            i += 1;
        }
    }

    HsvDocument { header, records }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_basic() {
        let test = format!("{STX}name{US}Alice{RS}age{US}30{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        assert_eq!(
            result.records[0].get("name"),
            Some(&HsvValue::String("Alice".to_string()))
        );
        assert_eq!(
            result.records[0].get("age"),
            Some(&HsvValue::String("30".to_string()))
        );
    }

    #[test]
    fn test_multiple_records() {
        let test = format!("{STX}name{US}Alice{FS}name{US}Bob{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 2);
    }

    #[test]
    fn test_arrays() {
        let test = format!("{STX}tags{US}a{GS}b{GS}c{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        match result.records[0].get("tags") {
            Some(HsvValue::Array(arr)) => {
                assert_eq!(arr.len(), 3);
                assert_eq!(arr[0], HsvValue::String("a".to_string()));
            }
            _ => panic!("Expected array"),
        }
    }

    #[test]
    fn test_header() {
        let test = format!("{SOH}hsv{US}1.0{RS}type{US}users{STX}name{US}Alice{ETX}");
        let result = parse(&test);
        assert!(result.header.is_some());
        let header = result.header.unwrap();
        assert_eq!(header.get("hsv"), Some(&HsvValue::String("1.0".to_string())));
        assert_eq!(
            header.get("type"),
            Some(&HsvValue::String("users".to_string()))
        );
        assert_eq!(result.records.len(), 1);
    }

    #[test]
    fn test_nesting() {
        let test = format!("{STX}user{US}{SO}name{US}Alice{RS}email{US}a@b.com{SI}{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        match result.records[0].get("user") {
            Some(HsvValue::Object(obj)) => {
                assert_eq!(obj.get("name"), Some(&HsvValue::String("Alice".to_string())));
                assert_eq!(
                    obj.get("email"),
                    Some(&HsvValue::String("a@b.com".to_string()))
                );
            }
            _ => panic!("Expected nested object"),
        }
    }

    #[test]
    fn test_deep_nesting() {
        let test = format!("{STX}data{US}{SO}level1{US}{SO}level2{US}deep{SI}{SI}{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        match result.records[0].get("data") {
            Some(HsvValue::Object(obj)) => match obj.get("level1") {
                Some(HsvValue::Object(inner)) => {
                    assert_eq!(
                        inner.get("level2"),
                        Some(&HsvValue::String("deep".to_string()))
                    );
                }
                _ => panic!("Expected level1 object"),
            },
            _ => panic!("Expected data object"),
        }
    }

    #[test]
    fn test_binary_mode() {
        let binary_data = format!("raw{STX}data{ETX}here");
        let test = format!("{STX}type{US}image{RS}data{US}{DLE}{STX}{binary_data}{DLE}{ETX}{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        assert_eq!(
            result.records[0].get("type"),
            Some(&HsvValue::String("image".to_string()))
        );
        assert_eq!(
            result.records[0].get("data"),
            Some(&HsvValue::String(binary_data))
        );
    }

    #[test]
    fn test_binary_with_dle() {
        let binary_data = format!("has{DLE}dle");
        let escaped = binary_data.replace(DLE, &format!("{DLE}{DLE}"));
        let test = format!("{STX}data{US}{DLE}{STX}{escaped}{DLE}{ETX}{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        assert_eq!(
            result.records[0].get("data"),
            Some(&HsvValue::String(binary_data))
        );
    }

    #[test]
    fn test_newlines() {
        let test = format!("{STX}text{US}line1\nline2\nline3{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        assert_eq!(
            result.records[0].get("text"),
            Some(&HsvValue::String("line1\nline2\nline3".to_string()))
        );
    }

    #[test]
    fn test_quotes() {
        let test = format!("{STX}msg{US}He said \"hello\"{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        assert_eq!(
            result.records[0].get("msg"),
            Some(&HsvValue::String("He said \"hello\"".to_string()))
        );
    }

    #[test]
    fn test_mixed_content() {
        let test = format!("ignored{STX}name{US}Alice{ETX}also ignored");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        assert_eq!(
            result.records[0].get("name"),
            Some(&HsvValue::String("Alice".to_string()))
        );
    }

    #[test]
    fn test_multiple_blocks() {
        let test = format!("{STX}a{US}1{ETX}junk{STX}b{US}2{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 2);
    }

    #[test]
    fn test_nested_array() {
        let test = format!("{STX}user{US}{SO}name{US}Alice{RS}tags{US}admin{GS}user{SI}{ETX}");
        let result = parse(&test);
        assert_eq!(result.records.len(), 1);
        match result.records[0].get("user") {
            Some(HsvValue::Object(obj)) => match obj.get("tags") {
                Some(HsvValue::Array(arr)) => {
                    assert_eq!(arr.len(), 2);
                    assert_eq!(arr[0], HsvValue::String("admin".to_string()));
                    assert_eq!(arr[1], HsvValue::String("user".to_string()));
                }
                _ => panic!("Expected tags array"),
            },
            _ => panic!("Expected user object"),
        }
    }

    #[test]
    fn test_complex() {
        let test = format!(
            "{SOH}hsv{US}1.0{RS}type{US}complex{STX}\
             user{US}{SO}name{US}Alice{RS}tags{US}admin{GS}active{SI}{FS}\
             user{US}{SO}name{US}Bob{RS}tags{US}user{SI}\
             {ETX}"
        );
        let result = parse(&test);

        // Check header
        assert!(result.header.is_some());
        let header = result.header.unwrap();
        assert_eq!(header.get("hsv"), Some(&HsvValue::String("1.0".to_string())));
        assert_eq!(
            header.get("type"),
            Some(&HsvValue::String("complex".to_string()))
        );

        // Check records
        assert_eq!(result.records.len(), 2);
    }

    #[test]
    fn test_sequential_ordering() {
        // Sequential parser should preserve order
        let test = format!(
            "{STX}order{US}1{FS}order{US}2{FS}order{US}3{FS}order{US}4{FS}order{US}5{ETX}"
        );
        let result = parse_sequential(&test);
        assert_eq!(result.records.len(), 5);

        for (i, record) in result.records.iter().enumerate() {
            assert_eq!(
                record.get("order"),
                Some(&HsvValue::String((i + 1).to_string()))
            );
        }
    }
}