// Copyright 2026 Danslav Slavenskoj, Lingenic LLC
// License: CC0 1.0 - Public Domain
// https://creativecommons.org/publicdomain/zero/1.0/
// You may use this code for any purpose without attribution.
//
// Spec: https://hsvfile.com
// Repo: https://github.com/LingenicLLC/HSV
//! # HSV - Hierarchical Separated Values
//!
//! A text-based file format and streaming protocol using ASCII control characters.
//! Unlimited nesting (like JSON). No escaping required. Binary data supported.
//!
//! ## Features
//!
//! - **SOH headers** - Metadata before data blocks
//! - **STX/ETX framing** - Clear block boundaries
//! - **SO/SI nesting** - Unlimited depth structures
//! - **DLE binary mode** - Embed any bytes
//! - **FS/GS/RS/US structure** - Records, arrays, properties, key-value pairs
//!
//! ## Example
//!
//! ```rust
//! use hsv::{parse, HsvValue};
//!
//! // Simple key-value record
//! let data = "\x02name\x1fAlice\x1eage\x1f30\x03";
//! let doc = hsv::parse(data);
//! assert_eq!(doc.records.len(), 1);
//! ```
//!
//! ## Control Characters
//!
//! | Code | Hex | Name | Purpose |
//! |------|------|------|---------|
//! | SOH | 0x01 | Start of Header | Begin header section |
//! | STX | 0x02 | Start of Text | Begin data block |
//! | ETX | 0x03 | End of Text | End data block |
//! | SO | 0x0E | Shift Out | Start nested structure |
//! | SI | 0x0F | Shift In | End nested structure |
//! | DLE | 0x10 | Data Link Escape | Binary mode escape |
//! | FS | 0x1C | File Separator | Record separator |
//! | GS | 0x1D | Group Separator | Array element separator |
//! | RS | 0x1E | Record Separator | Property separator |
//! | US | 0x1F | Unit Separator | Key-value separator |
use rayon::prelude::*;
use std::collections::HashMap;
/// Control characters used in HSV format
pub mod chars {
/// Start of Header (0x01)
pub const SOH: char = '\x01';
/// Start of Text - begins data block (0x02)
pub const STX: char = '\x02';
/// End of Text - ends data block (0x03)
pub const ETX: char = '\x03';
/// End of Transmission (0x04)
pub const EOT: char = '\x04';
/// Enquiry - request acknowledgment (0x05)
pub const ENQ: char = '\x05';
/// Acknowledge - success (0x06)
pub const ACK: char = '\x06';
/// Shift Out - start nested structure (0x0E)
pub const SO: char = '\x0e';
/// Shift In - end nested structure (0x0F)
pub const SI: char = '\x0f';
/// Data Link Escape - binary mode (0x10)
pub const DLE: char = '\x10';
/// XON - resume transmission (0x11)
pub const XON: char = '\x11';
/// XOFF - pause transmission (0x13)
pub const XOFF: char = '\x13';
/// Negative Acknowledge - error (0x15)
pub const NAK: char = '\x15';
/// Synchronous Idle - keepalive (0x16)
pub const SYN: char = '\x16';
/// Cancel operation (0x18)
pub const CAN: char = '\x18';
/// File Separator - record separator (0x1C)
pub const FS: char = '\x1c';
/// Group Separator - array element separator (0x1D)
pub const GS: char = '\x1d';
/// Record Separator - property separator (0x1E)
pub const RS: char = '\x1e';
/// Unit Separator - key-value separator (0x1F)
pub const US: char = '\x1f';
}
use chars::*;
/// Represents an HSV value - can be a string, array, or nested object
#[derive(Debug, Clone, PartialEq)]
pub enum HsvValue {
/// A string value
String(String),
/// An array of values (separated by GS)
Array(Vec<HsvValue>),
/// A nested object (wrapped in SO/SI)
Object(HashMap<String, HsvValue>),
}
impl HsvValue {
/// Returns the value as a string, if it is one
pub fn as_str(&self) -> Option<&str> {
match self {
HsvValue::String(s) => Some(s),
_ => None,
}
}
/// Returns the value as an array, if it is one
pub fn as_array(&self) -> Option<&Vec<HsvValue>> {
match self {
HsvValue::Array(a) => Some(a),
_ => None,
}
}
/// Returns the value as an object, if it is one
pub fn as_object(&self) -> Option<&HashMap<String, HsvValue>> {
match self {
HsvValue::Object(o) => Some(o),
_ => None,
}
}
/// Returns true if this is a string value
pub fn is_string(&self) -> bool {
matches!(self, HsvValue::String(_))
}
/// Returns true if this is an array value
pub fn is_array(&self) -> bool {
matches!(self, HsvValue::Array(_))
}
/// Returns true if this is an object value
pub fn is_object(&self) -> bool {
matches!(self, HsvValue::Object(_))
}
}
/// Result of parsing HSV text
#[derive(Debug, Clone)]
pub struct HsvDocument {
/// Optional header (content between SOH and STX)
pub header: Option<HashMap<String, HsvValue>>,
/// List of records from data blocks
pub records: Vec<HashMap<String, HsvValue>>,
}
impl HsvDocument {
/// Create a new empty document
pub fn new() -> Self {
HsvDocument {
header: None,
records: Vec::new(),
}
}
/// Returns true if the document has a header
pub fn has_header(&self) -> bool {
self.header.is_some()
}
/// Returns true if there are no records
pub fn is_empty(&self) -> bool {
self.records.is_empty()
}
/// Returns the number of records
pub fn len(&self) -> usize {
self.records.len()
}
}
impl Default for HsvDocument {
fn default() -> Self {
Self::new()
}
}
/// Extract DLE+STX...DLE+ETX binary sections and replace with placeholders
fn extract_binary_sections(text: &str) -> (String, HashMap<String, String>) {
let mut result = String::new();
let mut binaries = HashMap::new();
let chars: Vec<char> = text.chars().collect();
let mut i = 0;
let mut placeholder_count = 0;
while i < chars.len() {
// Check for DLE+STX (binary start)
if chars[i] == DLE && i + 1 < chars.len() && chars[i + 1] == STX {
let mut j = i + 2;
let mut binary_data = String::new();
while j < chars.len() {
if chars[j] == DLE && j + 1 < chars.len() {
if chars[j + 1] == ETX {
// End of binary section
let placeholder = format!("\0BINARY{}\0", placeholder_count);
// Unescape DLE+DLE -> DLE
let unescaped = unescape_binary(&binary_data);
binaries.insert(placeholder.clone(), unescaped);
result.push_str(&placeholder);
placeholder_count += 1;
i = j + 2;
break;
} else if chars[j + 1] == DLE {
// Escaped DLE
binary_data.push(DLE);
j += 2;
continue;
}
}
binary_data.push(chars[j]);
j += 1;
}
if j >= chars.len() {
// No closing DLE+ETX found
result.push(chars[i]);
i += 1;
}
} else {
result.push(chars[i]);
i += 1;
}
}
(result, binaries)
}
/// Handle DLE escaping: DLE+DLE -> DLE
fn unescape_binary(data: &str) -> String {
let chars: Vec<char> = data.chars().collect();
let mut result = String::new();
let mut i = 0;
while i < chars.len() {
if chars[i] == DLE && i + 1 < chars.len() && chars[i + 1] == DLE {
result.push(DLE);
i += 2;
} else {
result.push(chars[i]);
i += 1;
}
}
result
}
/// Replace binary placeholders with actual binary data
fn restore_binaries(value: &str, binaries: &HashMap<String, String>) -> String {
let mut result = value.to_string();
for (placeholder, data) in binaries {
result = result.replace(placeholder, data);
}
result
}
/// Split by separator, but respect SO/SI nesting depth
fn split_respecting_nesting(text: &str, sep: char) -> Vec<String> {
let mut parts = Vec::new();
let mut current = String::new();
let mut depth = 0;
for c in text.chars() {
match c {
c if c == SO => {
depth += 1;
current.push(c);
}
c if c == SI => {
depth -= 1;
current.push(c);
}
c if c == sep && depth == 0 => {
parts.push(current);
current = String::new();
}
_ => {
current.push(c);
}
}
}
if !current.is_empty() || !parts.is_empty() {
parts.push(current);
}
parts
}
/// Parse a value, handling arrays (GS) and nested structures (SO/SI)
fn parse_value(value: &str, binaries: &HashMap<String, String>) -> HsvValue {
let value = restore_binaries(value, binaries);
// Check for nested structure (SO at start, SI at end)
if value.starts_with(SO) && value.ends_with(SI) {
let inner = &value[SO.len_utf8()..value.len() - SI.len_utf8()];
return HsvValue::Object(parse_object(inner, binaries));
}
// Check for array (respect nesting)
if value.contains(GS) {
let parts = split_respecting_nesting(&value, GS);
let array: Vec<HsvValue> = parts.iter().map(|p| parse_value(p, binaries)).collect();
return HsvValue::Array(array);
}
HsvValue::String(value)
}
/// Parse an object from RS-separated properties
fn parse_object(content: &str, binaries: &HashMap<String, String>) -> HashMap<String, HsvValue> {
let mut obj = HashMap::new();
// Split by RS, respecting SO/SI nesting
let props = split_respecting_nesting(content, RS);
for prop in props {
// Split by US, respecting SO/SI nesting
let parts = split_respecting_nesting(&prop, US);
if parts.len() >= 2 {
let k = parts[0].clone();
let v = parts[1..].join(&US.to_string());
obj.insert(k, parse_value(&v, binaries));
}
}
obj
}
/// Parse HSV text into structured data.
///
/// This function processes HSV-encoded text and returns an `HsvDocument`
/// containing any header and all records found in the text.
///
/// # Arguments
///
/// * `text` - The HSV-encoded text to parse
///
/// # Returns
///
/// An `HsvDocument` containing the parsed header (if present) and records.
///
/// # Example
///
/// ```rust
/// use hsv::parse;
///
/// let text = "\x02name\x1fAlice\x1eage\x1f30\x03";
/// let doc = parse(text);
/// assert_eq!(doc.records.len(), 1);
/// ```
pub fn parse(text: &str) -> HsvDocument {
// First, extract binary sections
let (text, binaries) = extract_binary_sections(text);
let chars: Vec<char> = text.chars().collect();
let mut header: Option<HashMap<String, HsvValue>> = None;
let mut blocks: Vec<(usize, usize)> = Vec::new(); // (start, end) indices of data blocks
let mut i = 0;
while i < chars.len() {
// Check for SOH (header start)
if chars[i] == SOH {
// Find STX (header end, data start)
if let Some(stx_offset) = chars[i + 1..].iter().position(|&c| c == STX) {
let stx_pos = i + 1 + stx_offset;
// Parse header
let header_content: String = chars[i + 1..stx_pos].iter().collect();
header = Some(parse_object(&header_content, &binaries));
// Find ETX (data end)
if let Some(etx_offset) = chars[stx_pos + 1..].iter().position(|&c| c == ETX) {
let etx_pos = stx_pos + 1 + etx_offset;
blocks.push((stx_pos + 1, etx_pos));
i = etx_pos + 1;
continue;
}
}
i += 1;
}
// Check for STX (data block without header)
else if chars[i] == STX {
if let Some(etx_offset) = chars[i + 1..].iter().position(|&c| c == ETX) {
let etx_pos = i + 1 + etx_offset;
blocks.push((i + 1, etx_pos));
i = etx_pos + 1;
} else {
i += 1;
}
} else {
i += 1;
}
}
// Parse blocks in parallel
let records: Vec<HashMap<String, HsvValue>> = blocks
.par_iter()
.flat_map(|(start, end)| {
let block_content: String = chars[*start..*end].iter().collect();
split_respecting_nesting(&block_content, FS)
.into_iter()
.filter_map(|record| {
let obj = parse_object(&record, &binaries);
if !obj.is_empty() {
Some(obj)
} else {
None
}
})
.collect::<Vec<_>>()
})
.collect();
HsvDocument { header, records }
}
/// Parse HSV text sequentially (single-threaded).
///
/// Use this when you need deterministic ordering or are parsing small inputs
/// where parallel overhead isn't worth it.
pub fn parse_sequential(text: &str) -> HsvDocument {
// First, extract binary sections
let (text, binaries) = extract_binary_sections(text);
let chars: Vec<char> = text.chars().collect();
let mut header: Option<HashMap<String, HsvValue>> = None;
let mut records: Vec<HashMap<String, HsvValue>> = Vec::new();
let mut i = 0;
while i < chars.len() {
// Check for SOH (header start)
if chars[i] == SOH {
// Find STX (header end, data start)
if let Some(stx_offset) = chars[i + 1..].iter().position(|&c| c == STX) {
let stx_pos = i + 1 + stx_offset;
// Parse header
let header_content: String = chars[i + 1..stx_pos].iter().collect();
header = Some(parse_object(&header_content, &binaries));
// Find ETX (data end)
if let Some(etx_offset) = chars[stx_pos + 1..].iter().position(|&c| c == ETX) {
let etx_pos = stx_pos + 1 + etx_offset;
// Parse data block
let block_content: String = chars[stx_pos + 1..etx_pos].iter().collect();
for record in split_respecting_nesting(&block_content, FS) {
let obj = parse_object(&record, &binaries);
if !obj.is_empty() {
records.push(obj);
}
}
i = etx_pos + 1;
continue;
}
}
i += 1;
}
// Check for STX (data block without header)
else if chars[i] == STX {
if let Some(etx_offset) = chars[i + 1..].iter().position(|&c| c == ETX) {
let etx_pos = i + 1 + etx_offset;
// Parse data block
let block_content: String = chars[i + 1..etx_pos].iter().collect();
for record in split_respecting_nesting(&block_content, FS) {
let obj = parse_object(&record, &binaries);
if !obj.is_empty() {
records.push(obj);
}
}
i = etx_pos + 1;
} else {
i += 1;
}
} else {
i += 1;
}
}
HsvDocument { header, records }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic() {
let test = format!("{STX}name{US}Alice{RS}age{US}30{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
assert_eq!(
result.records[0].get("name"),
Some(&HsvValue::String("Alice".to_string()))
);
assert_eq!(
result.records[0].get("age"),
Some(&HsvValue::String("30".to_string()))
);
}
#[test]
fn test_multiple_records() {
let test = format!("{STX}name{US}Alice{FS}name{US}Bob{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 2);
}
#[test]
fn test_arrays() {
let test = format!("{STX}tags{US}a{GS}b{GS}c{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
match result.records[0].get("tags") {
Some(HsvValue::Array(arr)) => {
assert_eq!(arr.len(), 3);
assert_eq!(arr[0], HsvValue::String("a".to_string()));
}
_ => panic!("Expected array"),
}
}
#[test]
fn test_header() {
let test = format!("{SOH}hsv{US}1.0{RS}type{US}users{STX}name{US}Alice{ETX}");
let result = parse(&test);
assert!(result.header.is_some());
let header = result.header.unwrap();
assert_eq!(header.get("hsv"), Some(&HsvValue::String("1.0".to_string())));
assert_eq!(
header.get("type"),
Some(&HsvValue::String("users".to_string()))
);
assert_eq!(result.records.len(), 1);
}
#[test]
fn test_nesting() {
let test = format!("{STX}user{US}{SO}name{US}Alice{RS}email{US}a@b.com{SI}{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
match result.records[0].get("user") {
Some(HsvValue::Object(obj)) => {
assert_eq!(obj.get("name"), Some(&HsvValue::String("Alice".to_string())));
assert_eq!(
obj.get("email"),
Some(&HsvValue::String("a@b.com".to_string()))
);
}
_ => panic!("Expected nested object"),
}
}
#[test]
fn test_deep_nesting() {
let test = format!("{STX}data{US}{SO}level1{US}{SO}level2{US}deep{SI}{SI}{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
match result.records[0].get("data") {
Some(HsvValue::Object(obj)) => match obj.get("level1") {
Some(HsvValue::Object(inner)) => {
assert_eq!(
inner.get("level2"),
Some(&HsvValue::String("deep".to_string()))
);
}
_ => panic!("Expected level1 object"),
},
_ => panic!("Expected data object"),
}
}
#[test]
fn test_binary_mode() {
let binary_data = format!("raw{STX}data{ETX}here");
let test = format!("{STX}type{US}image{RS}data{US}{DLE}{STX}{binary_data}{DLE}{ETX}{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
assert_eq!(
result.records[0].get("type"),
Some(&HsvValue::String("image".to_string()))
);
assert_eq!(
result.records[0].get("data"),
Some(&HsvValue::String(binary_data))
);
}
#[test]
fn test_binary_with_dle() {
let binary_data = format!("has{DLE}dle");
let escaped = binary_data.replace(DLE, &format!("{DLE}{DLE}"));
let test = format!("{STX}data{US}{DLE}{STX}{escaped}{DLE}{ETX}{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
assert_eq!(
result.records[0].get("data"),
Some(&HsvValue::String(binary_data))
);
}
#[test]
fn test_newlines() {
let test = format!("{STX}text{US}line1\nline2\nline3{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
assert_eq!(
result.records[0].get("text"),
Some(&HsvValue::String("line1\nline2\nline3".to_string()))
);
}
#[test]
fn test_quotes() {
let test = format!("{STX}msg{US}He said \"hello\"{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
assert_eq!(
result.records[0].get("msg"),
Some(&HsvValue::String("He said \"hello\"".to_string()))
);
}
#[test]
fn test_mixed_content() {
let test = format!("ignored{STX}name{US}Alice{ETX}also ignored");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
assert_eq!(
result.records[0].get("name"),
Some(&HsvValue::String("Alice".to_string()))
);
}
#[test]
fn test_multiple_blocks() {
let test = format!("{STX}a{US}1{ETX}junk{STX}b{US}2{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 2);
}
#[test]
fn test_nested_array() {
let test = format!("{STX}user{US}{SO}name{US}Alice{RS}tags{US}admin{GS}user{SI}{ETX}");
let result = parse(&test);
assert_eq!(result.records.len(), 1);
match result.records[0].get("user") {
Some(HsvValue::Object(obj)) => match obj.get("tags") {
Some(HsvValue::Array(arr)) => {
assert_eq!(arr.len(), 2);
assert_eq!(arr[0], HsvValue::String("admin".to_string()));
assert_eq!(arr[1], HsvValue::String("user".to_string()));
}
_ => panic!("Expected tags array"),
},
_ => panic!("Expected user object"),
}
}
#[test]
fn test_complex() {
let test = format!(
"{SOH}hsv{US}1.0{RS}type{US}complex{STX}\
user{US}{SO}name{US}Alice{RS}tags{US}admin{GS}active{SI}{FS}\
user{US}{SO}name{US}Bob{RS}tags{US}user{SI}\
{ETX}"
);
let result = parse(&test);
// Check header
assert!(result.header.is_some());
let header = result.header.unwrap();
assert_eq!(header.get("hsv"), Some(&HsvValue::String("1.0".to_string())));
assert_eq!(
header.get("type"),
Some(&HsvValue::String("complex".to_string()))
);
// Check records
assert_eq!(result.records.len(), 2);
}
#[test]
fn test_sequential_ordering() {
// Sequential parser should preserve order
let test = format!(
"{STX}order{US}1{FS}order{US}2{FS}order{US}3{FS}order{US}4{FS}order{US}5{ETX}"
);
let result = parse_sequential(&test);
assert_eq!(result.records.len(), 5);
for (i, record) in result.records.iter().enumerate() {
assert_eq!(
record.get("order"),
Some(&HsvValue::String((i + 1).to_string()))
);
}
}
}
// Copyright 2026 Danslav Slavenskoj, Lingenic LLC
// License: CC0 1.0 - Public Domain
// https://creativecommons.org/publicdomain/zero/1.0/
// You may use this code for any purpose without attribution.
//
// Spec: https://hsvfile.com
// Repo: https://github.com/LingenicLLC/HSV
//! # HSV - Hierarchical Separated Values
//!
//! A text-based file format and streaming protocol using ASCII control characters.
//! Unlimited nesting (like JSON). No escaping required. Binary data supported.
//!
//! ## Features
//!
//! - **SOH headers** - Metadata before data blocks
//! - **STX/ETX framing** - Clear block boundaries
//! - **SO/SI nesting** - Unlimited depth structures
//! - **DLE binary mode** - Embed any bytes
//! - **FS/GS/RS/US structure** - Records, arrays, properties, key-value pairs
//!
//! ## Example
//!
//! ```rust
//! use hsv::{parse, HsvValue};
//!
//! // Simple key-value record
//! let data = "\x02name\x1fAlice\x1eage\x1f30\x03";
//! let doc = hsv::parse(data);
//! assert_eq!(doc.records.len(), 1);
//! ```
//!
//! ## Control Characters
//!
//! | Code | Hex | Name | Purpose |
//! |------|------|------|---------|
//! | SOH | 0x01 | Start of Header | Begin header section |
//! | STX | 0x02 | Start of Text | Begin data block |
//! | ETX | 0x03 | End of Text | End data block |
//! | SO | 0x0E | Shift Out | Start nested structure |
//! | SI | 0x0F | Shift In | End nested structure |
//! | DLE | 0x10 | Data Link Escape | Binary mode escape |
//! | FS | 0x1C | File Separator | Record separator |
//! | GS | 0x1D | Group Separator | Array element separator |
//! | RS | 0x1E | Record Separator | Property separator |
//! | US | 0x1F | Unit Separator | Key-value separator |
use *;
use HashMap;
/// Control characters used in HSV format
use *;
/// Represents an HSV value - can be a string, array, or nested object
/// Result of parsing HSV text
/// Extract DLE+STX...DLE+ETX binary sections and replace with placeholders
/// Handle DLE escaping: DLE+DLE -> DLE
/// Replace binary placeholders with actual binary data
/// Split by separator, but respect SO/SI nesting depth
/// Parse a value, handling arrays (GS) and nested structures (SO/SI)
/// Parse an object from RS-separated properties
/// Parse HSV text into structured data.
///
/// This function processes HSV-encoded text and returns an `HsvDocument`
/// containing any header and all records found in the text.
///
/// # Arguments
///
/// * `text` - The HSV-encoded text to parse
///
/// # Returns
///
/// An `HsvDocument` containing the parsed header (if present) and records.
///
/// # Example
///
/// ```rust
/// use hsv::parse;
///
/// let text = "\x02name\x1fAlice\x1eage\x1f30\x03";
/// let doc = parse(text);
/// assert_eq!(doc.records.len(), 1);
/// ```
/// Parse HSV text sequentially (single-threaded).
///
/// Use this when you need deterministic ordering or are parsing small inputs
/// where parallel overhead isn't worth it.