diff --git a/tachyon/compute/src/codegen.rs b/tachyon/compute/src/codegen.rs index 370ffcc..a883a01 100644 --- a/tachyon/compute/src/codegen.rs +++ b/tachyon/compute/src/codegen.rs @@ -135,6 +135,11 @@ impl CodeGen for Expr { var.to_string() } Expr::Literal(l) => { + if matches!(l, Literal::Str(_)) { + return Err(TypeError::Unsupported( + "String literals are not yet supported in GPU codegen".to_string(), + )); + } let value = match l { Literal::I8(i) => format!("{}", i), Literal::I16(i) => format!("{}", i), @@ -151,7 +156,7 @@ impl CodeGen for Expr { Literal::F32(f) => format!("{}f", float_literal_to_str(*f)), Literal::F64(f) => float_literal_to_str(*f).to_string(), Literal::Bool(b) => (if *b { "true" } else { "false" }).to_string(), - Literal::Str(s) => format!("\"{}\"", escape_c_string(s)), + Literal::Str(_) => unreachable!(), }; let var = code_block.next_var(); let ty_c = result_type.c_type(); @@ -203,15 +208,188 @@ impl CodeGen for Expr { } } Expr::Nary { op: _, args: _ } => unimplemented!(), - Expr::Call { name, args } => { - let mut arg_strs = Vec::with_capacity(args.len()); - for a in args { - arg_strs.push(a.build_nvrtc_code::(schema, code_block)?); + Expr::Call { name, args } => match name.as_str() { + "length" => { + if args.len() != 1 { + return Err(TypeError::Unsupported("length arity".into())); + } + let (col_idx, col_type, col_name) = match &args[0] { + Expr::Column(col_name) => { + let (idx, dt) = schema + .lookup(col_name) + .copied() + .ok_or_else(|| TypeError::UnknownColumn(col_name.clone()))?; + (idx, dt, col_name.clone()) + } + _ => { + return Err(TypeError::Unsupported( + "length currently requires a string column argument".into(), + )); + } + }; + if col_type != DataType::Str { + return Err(TypeError::Unsupported("length expects string column".into())); + } + let arg_var = + code_block.add_load_column::(&col_name, col_idx, &col_type).to_string(); + let var = code_block.next_var(); + code_block.add_code(&format!( + "\tUInt32 {} = string_ops::length({}, input[{}]);\n", + var, arg_var, col_idx + )); + var } - let var = code_block.next_var(); - code_block.add_code(&format!("{}({})", name, arg_strs.join(", "))); - var - } + "lower" | "lower_case" => { + if args.len() != 1 { + return Err(TypeError::Unsupported("lower arity".into())); + } + let (col_idx, col_type, col_name) = match &args[0] { + Expr::Column(col_name) => { + let (idx, dt) = schema + .lookup(col_name) + .copied() + .ok_or_else(|| TypeError::UnknownColumn(col_name.clone()))?; + (idx, dt, col_name.clone()) + } + _ => { + return Err(TypeError::Unsupported( + "lower currently requires a string column argument".into(), + )); + } + }; + if col_type != DataType::Str { + return Err(TypeError::Unsupported("lower expects string column".into())); + } + let arg_var = + code_block.add_load_column::(&col_name, col_idx, &col_type).to_string(); + let var = code_block.next_var(); + code_block.add_code(&format!( + "\tString {} = string_ops::lower({}, input[{}], output[0], row_idx);\n", + var, arg_var, col_idx + )); + var + } + "upper" | "upper_case" => { + if args.len() != 1 { + return Err(TypeError::Unsupported("upper arity".into())); + } + let (col_idx, col_type, col_name) = match &args[0] { + Expr::Column(col_name) => { + let (idx, dt) = schema + .lookup(col_name) + .copied() + .ok_or_else(|| TypeError::UnknownColumn(col_name.clone()))?; + (idx, dt, col_name.clone()) + } + _ => { + return Err(TypeError::Unsupported( + "upper currently requires a string column argument".into(), + )); + } + }; + if col_type != DataType::Str { + return Err(TypeError::Unsupported("upper expects string column".into())); + } + let arg_var = + code_block.add_load_column::(&col_name, col_idx, &col_type).to_string(); + let var = code_block.next_var(); + code_block.add_code(&format!( + "\tString {} = string_ops::upper({}, input[{}], output[0], row_idx);\n", + var, arg_var, col_idx + )); + var + } + "substring" => { + if args.len() != 3 { + return Err(TypeError::Unsupported("substring arity".into())); + } + let (col_idx, col_type, col_name) = match &args[0] { + Expr::Column(col_name) => { + let (idx, dt) = schema + .lookup(col_name) + .copied() + .ok_or_else(|| TypeError::UnknownColumn(col_name.clone()))?; + (idx, dt, col_name.clone()) + } + _ => { + return Err(TypeError::Unsupported( + "substring currently requires first argument as string column" + .into(), + )); + } + }; + if col_type != DataType::Str { + return Err(TypeError::Unsupported( + "substring expects first argument string column".into(), + )); + } + let start_var = args[1].build_nvrtc_code::(schema, code_block)?; + let len_var = args[2].build_nvrtc_code::(schema, code_block)?; + let arg_var = + code_block.add_load_column::(&col_name, col_idx, &col_type).to_string(); + let var = code_block.next_var(); + code_block.add_code(&format!( + "\tString {};\n\t{}.valid = {}.valid & {}.valid & {}.valid;\n\tif ({}.valid) {{\n\t\t{}.value = string_ops::substring({}, (int32_t)({}.value), (int32_t)({}.value), input[{}], output[0], row_idx).value;\n\t}}\n", + var, var, arg_var, start_var, len_var, var, var, arg_var, start_var, len_var, col_idx + )); + var + } + "concat" => { + if args.len() != 2 { + return Err(TypeError::Unsupported("concat arity".into())); + } + let (l_idx, l_type, l_name) = match &args[0] { + Expr::Column(col_name) => { + let (idx, dt) = schema + .lookup(col_name) + .copied() + .ok_or_else(|| TypeError::UnknownColumn(col_name.clone()))?; + (idx, dt, col_name.clone()) + } + _ => { + return Err(TypeError::Unsupported( + "concat currently requires string column arguments".into(), + )); + } + }; + let (r_idx, r_type, r_name) = match &args[1] { + Expr::Column(col_name) => { + let (idx, dt) = schema + .lookup(col_name) + .copied() + .ok_or_else(|| TypeError::UnknownColumn(col_name.clone()))?; + (idx, dt, col_name.clone()) + } + _ => { + return Err(TypeError::Unsupported( + "concat currently requires string column arguments".into(), + )); + } + }; + if l_type != DataType::Str || r_type != DataType::Str { + return Err(TypeError::Unsupported("concat expects string columns".into())); + } + let l_var = + code_block.add_load_column::(&l_name, l_idx, &l_type).to_string(); + let r_var = + code_block.add_load_column::(&r_name, r_idx, &r_type).to_string(); + let var = code_block.next_var(); + code_block.add_code(&format!( + "\tString {} = string_ops::concat({}, input[{}], {}, input[{}], output[0], row_idx);\n", + var, l_var, l_idx, r_var, r_idx + )); + var + } + _ => { + let mut arg_strs = Vec::with_capacity(args.len()); + for a in args { + arg_strs.push(a.build_nvrtc_code::(schema, code_block)?); + } + let var = code_block.next_var(); + code_block.add_code(&format!("{}({})", name, arg_strs.join(", "))); + var + } + }, Expr::Cast { expr, to } => { let e_var = expr.build_nvrtc_code::(schema, code_block)?; let from = expr.infer_type(schema)?; @@ -272,9 +450,6 @@ fn op_kernel_fn(op: Operator) -> String { kernel_fn.to_string() } -fn escape_c_string(s: &str) -> String { - s.replace('"', "\\\"") -} /// Formats floating-point literals for CUDA code generation. pub(crate) fn float_literal_to_str + Copy + PartialEq>(f: T) -> String { let f64_val = f.into(); diff --git a/tachyon/compute/src/column.rs b/tachyon/compute/src/column.rs index 87984d8..3c96082 100644 --- a/tachyon/compute/src/column.rs +++ b/tachyon/compute/src/column.rs @@ -60,6 +60,8 @@ pub struct Column { pub data_type: DataType, /// Type-erased values container. pub values: Arc, + /// Backing buffer for encoded UTF-8 bytes when `data_type` is [`DataType::Str`]. + pub string_buffer: Option>>, /// Null bitmap where `1` indicates valid and `0` indicates null. pub null_bits: Option>, } @@ -88,7 +90,27 @@ impl Column { pub fn new( name: &str, values: Arc, null_bits: Option>, ) -> Self { - Self { name: name.to_string(), data_type: values.data_type(), values, null_bits } + Self { + name: name.to_string(), + data_type: values.data_type(), + values, + string_buffer: None, + null_bits, + } + } + + /// Creates a string column from pre-encoded string views and string buffer. + pub fn new_string( + name: &str, views: Arc>, string_buffer: Vec, + null_bits: Option>, + ) -> Self { + Self { + name: name.to_string(), + data_type: DataType::Str, + values: views, + string_buffer: Some(Arc::new(string_buffer)), + null_bits, + } } /// Number of rows in the column. @@ -129,7 +151,16 @@ impl Column { DataType::F32 => from_gpu_column!(name, f32, data_type, column), DataType::F64 => from_gpu_column!(name, f64, data_type, column), DataType::Bool => from_gpu_column!(name, bool, data_type, column), - _ => todo!(), + DataType::Str => { + let views = column.host_data::()?; + let buffer = column.host_string_buffer()?.unwrap_or_default(); + Self::new_string( + name, + Arc::new(VecArray { data: views, datatype: data_type }), + buffer, + column.host_bitmap()?.map(|bitmap| BitVector::new(bitmap, column.len())), + ) + } }; Ok(col) } @@ -150,7 +181,20 @@ impl Column { DataType::F32 => to_gpu_column!(self, f32), DataType::F64 => to_gpu_column!(self, f64), DataType::Bool => to_gpu_column!(self, bool), - _ => Err(format!("Unsupported data type: {:?}", self.data_type).into()), + DataType::Str => { + let views = self + .data_as_slice::() + .ok_or("Failed to cast to StringView")?; + let buffer = self + .string_buffer + .as_ref() + .ok_or("String column requires a separate string buffer")?; + gpu_column::Column::new_string( + views, + buffer.as_slice(), + self.null_bits.as_ref().map(|bits| bits.as_slice()), + ) + } } } @@ -158,4 +202,128 @@ impl Column { pub fn null_bits_as_slice(&self) -> Option<&BitVector> { self.null_bits.as_ref() } + + /// Returns encoded UTF-8 buffer for string columns. + pub fn string_buffer_as_slice(&self) -> Option<&[u8]> { + self.string_buffer.as_ref().map(|b| b.as_slice()) + } +} + +#[cfg(test)] +mod tests { + use gpu::column::{ + STRING_INLINE_DATA_BYTES, STRING_INLINE_PREFIX_BYTES, STRING_INLINE_TOTAL_BYTES, + }; + + use super::*; + + fn pack_u32_prefix(bytes: &[u8]) -> u32 { + let mut prefix = 0u32; + let take = bytes.len().min(STRING_INLINE_PREFIX_BYTES); + for (i, b) in bytes.iter().take(take).enumerate() { + prefix |= (*b as u32) << (i * 8); + } + prefix + } + + fn encode_string_view( + s: &str, buffer: &mut Vec, + ) -> Result> { + let bytes = s.as_bytes(); + let size = u32::try_from(bytes.len()).map_err(|_| "String length exceeds u32")?; + let prefix = pack_u32_prefix(bytes); + if bytes.len() <= STRING_INLINE_TOTAL_BYTES { + let mut payload = 0u64; + for (i, b) in bytes + .iter() + .enumerate() + .skip(STRING_INLINE_PREFIX_BYTES) + .take(STRING_INLINE_DATA_BYTES) + { + payload |= (*b as u64) << ((i - STRING_INLINE_PREFIX_BYTES) * 8); + } + Ok(gpu_column::StringView { size, prefix, data: payload }) + } else { + let offset = + u64::try_from(buffer.len()).map_err(|_| "String buffer offset overflow")?; + buffer.extend_from_slice(bytes); + Ok(gpu_column::StringView { size, prefix, data: offset }) + } + } + + fn decode_string_view( + view: gpu_column::StringView, buffer: &[u8], + ) -> Result> { + let len = view.size as usize; + if len <= STRING_INLINE_TOTAL_BYTES { + let mut tmp = [0u8; STRING_INLINE_TOTAL_BYTES]; + for (i, out) in tmp.iter_mut().enumerate().take(STRING_INLINE_PREFIX_BYTES) { + *out = ((view.prefix >> (i * 8)) & 0xFF) as u8; + } + for i in 0..STRING_INLINE_DATA_BYTES { + tmp[STRING_INLINE_PREFIX_BYTES + i] = ((view.data >> (i * 8)) & 0xFF) as u8; + } + return Ok(std::str::from_utf8(&tmp[..len])?.to_string()); + } + + let offset = view.data as usize; + let end = offset.checked_add(len).ok_or("StringView offset overflow while decoding")?; + if end > buffer.len() { + return Err("StringView points outside string buffer".into()); + } + Ok(std::str::from_utf8(&buffer[offset..end])?.to_string()) + } + + #[test] + fn test_string_view_inline_roundtrip() { + let mut buf = Vec::new(); + let view = encode_string_view("hello", &mut buf).expect("encode"); + assert!(buf.is_empty()); + let out = decode_string_view(view, &buf).expect("decode"); + assert_eq!(out, "hello"); + } + + #[test] + fn test_string_view_external_roundtrip() { + let mut buf = Vec::new(); + let text = "this string is longer than twelve bytes"; + let view = encode_string_view(text, &mut buf).expect("encode"); + assert!(!buf.is_empty()); + let out = decode_string_view(view, &buf).expect("decode"); + assert_eq!(out, text); + } + + #[test] + fn test_string_view_inline_roundtrip_german_utf8() { + let mut buf = Vec::new(); + let text = "straße"; + assert!(text.len() <= STRING_INLINE_TOTAL_BYTES); + let view = encode_string_view(text, &mut buf).expect("encode"); + assert!(buf.is_empty()); + let out = decode_string_view(view, &buf).expect("decode"); + assert_eq!(out, text); + } + + #[test] + fn test_string_view_external_roundtrip_hindi_utf8() { + let mut buf = Vec::new(); + let text = "नमस्ते दुनिया"; + assert!(text.len() > STRING_INLINE_TOTAL_BYTES); + let view = encode_string_view(text, &mut buf).expect("encode"); + assert!(!buf.is_empty()); + let out = decode_string_view(view, &buf).expect("decode"); + assert_eq!(out, text); + } + + #[test] + fn test_string_view_roundtrip_mixed_languages() { + let mut buf = Vec::new(); + let texts = ["Grüße", "नमस्ते", "München में स्वागत"]; + + for text in texts { + let view = encode_string_view(text, &mut buf).expect("encode"); + let out = decode_string_view(view, &buf).expect("decode"); + assert_eq!(out, text); + } + } } diff --git a/tachyon/compute/src/data_type.rs b/tachyon/compute/src/data_type.rs index 0ef826d..5a0f697 100644 --- a/tachyon/compute/src/data_type.rs +++ b/tachyon/compute/src/data_type.rs @@ -4,6 +4,7 @@ * This source code is licensed under the Apache License, Version 2.0, * as found in the LICENSE file in the root directory of this source tree. */ +use gpu::column::StringView; use half::{bf16, f16}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -42,7 +43,7 @@ impl DataType { DataType::F16 => std::mem::size_of::(), DataType::F32 => std::mem::size_of::(), DataType::F64 => std::mem::size_of::(), - DataType::Str => std::mem::size_of::(), + DataType::Str => std::mem::size_of::(), } } @@ -62,7 +63,7 @@ impl DataType { DataType::F16 => "float16", DataType::F32 => "float", DataType::F64 => "double", - DataType::Str => "uint8_t", + DataType::Str => "StringView", } } diff --git a/tachyon/compute/src/evaluate.rs b/tachyon/compute/src/evaluate.rs index 1d291da..717f0b5 100644 --- a/tachyon/compute/src/evaluate.rs +++ b/tachyon/compute/src/evaluate.rs @@ -59,6 +59,12 @@ async fn evaluate_gpu( async fn evaluate_gpu_row( expr: &Expr, schema_context: &SchemaContext, columns: &[Column], ) -> Result>, Box> { + if let Expr::Column(name) = expr { + if let Some((idx, DataType::Str)) = schema_context.lookup(name).copied() { + return Ok(vec![columns[idx as usize].clone()]); + } + } + let mut code_block = CodeBlock::default(); expr.to_nvrtc::(schema_context, &mut code_block)?; @@ -69,11 +75,20 @@ async fn evaluate_gpu_row( let mut output_cols = Vec::::new(); let result_type = expr.infer_type(schema_context)?; - let gpu_col = gpu_column::Column::new_uninitialized::( - size * result_type.native_size(), - size.div_ceil(B::BITS), - size, - )?; + let gpu_col = if result_type == DataType::Str { + let row_capacity = estimate_string_row_capacity(expr, schema_context, columns)?; + gpu_column::Column::new_uninitialized_string::( + size, + row_capacity * size, + size.div_ceil(B::BITS), + )? + } else { + gpu_column::Column::new_uninitialized::( + size * result_type.native_size(), + size.div_ceil(B::BITS), + size, + )? + }; output_cols.push(gpu_col); gpu::launch::(code_block.code(), &input_cols, &output_cols).await?; @@ -88,6 +103,60 @@ async fn evaluate_gpu_row( Ok(result_cols) } +fn max_string_len_column(col: &Column) -> Result> { + let values = col + .data_as_slice::() + .ok_or("String expression requires encoded StringView columns")?; + Ok(values.iter().map(|sv| sv.size as usize).max().unwrap_or(0)) +} + +fn estimate_string_row_capacity( + expr: &Expr, schema_context: &SchemaContext, columns: &[Column], +) -> Result> { + match expr { + Expr::Column(name) => { + let (idx, dt) = schema_context + .lookup(name) + .copied() + .ok_or_else(|| format!("unknown column: {}", name))?; + if dt != DataType::Str { + return Err("Expected string column".into()); + } + Ok(max_string_len_column(&columns[idx as usize])?) + } + Expr::Call { name, args } => match name.as_str() { + "lower" | "lower_case" | "upper" | "upper_case" => { + if args.len() != 1 { + return Err(format!("{} expects 1 argument", name).into()); + } + estimate_string_row_capacity(&args[0], schema_context, columns) + } + "substring" => { + if args.len() != 3 { + return Err("substring expects 3 arguments".into()); + } + let base_cap = estimate_string_row_capacity(&args[0], schema_context, columns)?; + let requested = match &args[2] { + Expr::Literal(crate::expr::Literal::I32(v)) => (*v).max(0) as usize, + Expr::Literal(crate::expr::Literal::I64(v)) => (*v).max(0) as usize, + _ => base_cap, + }; + Ok(base_cap.min(requested)) + } + "concat" => { + if args.len() != 2 { + return Err("concat expects 2 arguments".into()); + } + let left = estimate_string_row_capacity(&args[0], schema_context, columns)?; + let right = estimate_string_row_capacity(&args[1], schema_context, columns)?; + Ok(left + right) + } + _ => Err(format!("Unsupported string function for output sizing: {}", name).into()), + }, + _ => Err("Unable to infer string output buffer size for this expression".into()), + } +} + async fn evaluate_gpu_aggregate( op: Operator, arg: &Expr, distinct: bool, schema_context: &SchemaContext, columns: &[Column], ) -> Result>, Box> { diff --git a/tachyon/compute/src/expr.rs b/tachyon/compute/src/expr.rs index 85eca0d..a4e832f 100644 --- a/tachyon/compute/src/expr.rs +++ b/tachyon/compute/src/expr.rs @@ -328,12 +328,71 @@ impl Expr { if args.len() != 1 { Err(TypeError::Unsupported("lower arity".into()))?; } + let t = args[0].infer_type(schema)?; + if t != DataType::Str { + Err(TypeError::Unsupported(format!("lower on {:?}", t)))?; + } + Ok(DataType::Str) + } + "lower_case" => { + if args.len() != 1 { + Err(TypeError::Unsupported("lower_case arity".into()))?; + } + let t = args[0].infer_type(schema)?; + if t != DataType::Str { + Err(TypeError::Unsupported(format!("lower_case on {:?}", t)))?; + } Ok(DataType::Str) } "upper" => { if args.len() != 1 { Err(TypeError::Unsupported("upper arity".into()))?; } + let t = args[0].infer_type(schema)?; + if t != DataType::Str { + Err(TypeError::Unsupported(format!("upper on {:?}", t)))?; + } + Ok(DataType::Str) + } + "upper_case" => { + if args.len() != 1 { + Err(TypeError::Unsupported("upper_case arity".into()))?; + } + let t = args[0].infer_type(schema)?; + if t != DataType::Str { + Err(TypeError::Unsupported(format!("upper_case on {:?}", t)))?; + } + Ok(DataType::Str) + } + "length" => { + if args.len() != 1 { + Err(TypeError::Unsupported("length arity".into()))?; + } + let t = args[0].infer_type(schema)?; + if t != DataType::Str { + Err(TypeError::Unsupported(format!("length on {:?}", t)))?; + } + Ok(DataType::U32) + } + "substring" => { + if args.len() != 3 { + Err(TypeError::Unsupported("substring arity".into()))?; + } + let t = args[0].infer_type(schema)?; + if t != DataType::Str { + Err(TypeError::Unsupported(format!("substring on {:?}", t)))?; + } + Ok(DataType::Str) + } + "concat" => { + if args.len() != 2 { + Err(TypeError::Unsupported("concat arity".into()))?; + } + let lt = args[0].infer_type(schema)?; + let rt = args[1].infer_type(schema)?; + if lt != DataType::Str || rt != DataType::Str { + Err(TypeError::Unsupported(format!("concat on {:?} and {:?}", lt, rt)))?; + } Ok(DataType::Str) } _ => Err(TypeError::Unsupported(format!("unknown function {}", name))), diff --git a/tachyon/compute/tests/data_type_tests.rs b/tachyon/compute/tests/data_type_tests.rs index a2c856d..0416d2c 100644 --- a/tachyon/compute/tests/data_type_tests.rs +++ b/tachyon/compute/tests/data_type_tests.rs @@ -42,7 +42,7 @@ fn test_c_type_mapping() { DataType::F16 => assert_eq!(ctype, "float16"), DataType::F32 => assert_eq!(ctype, "float"), DataType::F64 => assert_eq!(ctype, "double"), - DataType::Str => assert_eq!(ctype, "uint8_t"), + DataType::Str => assert_eq!(ctype, "StringView"), } } matrix_test!(check_c_type); diff --git a/tachyon/compute/tests/parser_tests.rs b/tachyon/compute/tests/parser_tests.rs index 6735c08..6b2e57d 100644 --- a/tachyon/compute/tests/parser_tests.rs +++ b/tachyon/compute/tests/parser_tests.rs @@ -87,7 +87,13 @@ test_parser_matrix!( test_parser_matrix!( Call, - [(test_parse_sqrt, "(sqrt, i0)", "sqrt"), (test_parse_upper, "(upper, i0)", "upper"),] + [ + (test_parse_sqrt, "(sqrt, i0)", "sqrt"), + (test_parse_upper, "(upper, i0)", "upper"), + (test_parse_length, "(length, s0)", "length"), + (test_parse_substring, "(substring, s0, 1, 3)", "substring"), + (test_parse_concat, "(concat, s0, s1)", "concat"), + ] ); macro_rules! test_parse_aggregate { @@ -141,3 +147,21 @@ fn test_cast() { let expr = parse_scheme_expr("(cast, i0, f64)").unwrap(); assert!(matches!(expr, Expr::Cast { .. })); } + +#[test] +fn test_parse_unicode_string_literal() { + let expr = parse_scheme_expr("(upper, \"नमस्ते Grüße\")").unwrap(); + match expr { + Expr::Call { name, args } => { + assert_eq!(name, "upper"); + assert_eq!(args.len(), 1); + match &args[0] { + Expr::Literal(compute::expr::Literal::Str(s)) => { + assert_eq!(s, "नमस्ते Grüße"); + } + other => panic!("Expected string literal argument, got {:?}", other), + } + } + other => panic!("Expected call expression, got {:?}", other), + } +} diff --git a/tachyon/gpu/src/column.rs b/tachyon/gpu/src/column.rs index 691a595..b8de1fe 100644 --- a/tachyon/gpu/src/column.rs +++ b/tachyon/gpu/src/column.rs @@ -5,4 +5,7 @@ * as found in the LICENSE file in the root directory of this source tree. */ -pub use crate::ffi::column::Column; +pub use crate::ffi::column::{ + Column, STRING_INLINE_DATA_BYTES, STRING_INLINE_PREFIX_BYTES, STRING_INLINE_TOTAL_BYTES, + StringView, +}; diff --git a/tachyon/gpu/src/cuda_launcher.rs b/tachyon/gpu/src/cuda_launcher.rs index e0eeabc..eb94526 100644 --- a/tachyon/gpu/src/cuda_launcher.rs +++ b/tachyon/gpu/src/cuda_launcher.rs @@ -28,6 +28,7 @@ const EVAL_KERNEL_HEADERS_FINGERPRINT: &str = concat!( include_str!("ffi/kernels/math.cuh"), include_str!("ffi/kernels/limits.cuh"), include_str!("ffi/kernels/utils.cuh"), + include_str!("ffi/kernels/string_ops.cuh"), include_str!("ffi/kernels/bitVector.cuh"), include_str!("ffi/kernels/error.h"), ); @@ -52,6 +53,7 @@ fn compose_kernel_source(kernel_name: &str, code: &str) -> String { #include "column.cuh" #include "context.cuh" #include "math.cuh" + #include "string_ops.cuh" extern "C" __global__ void {kernel_name}(Context* ctx, Column* input, Column* output, size_t num_rows) {{ size_t row_idx = blockIdx.x * blockDim.x + threadIdx.x; if (row_idx >= num_rows) return; diff --git a/tachyon/gpu/src/ffi/column.rs b/tachyon/gpu/src/ffi/column.rs index 8d84c4a..9d4ca01 100644 --- a/tachyon/gpu/src/ffi/column.rs +++ b/tachyon/gpu/src/ffi/column.rs @@ -8,9 +8,29 @@ use std::error::Error; use crate::ffi::memory::gpu_memory::{GpuMemory, MemoryType}; + +/// Compact string descriptor stored in the main column stream. +/// +/// Memory layout (16 bytes total): +/// - 4 bytes: string byte length +/// - 4 bytes: prefix (first 4 bytes) +/// - 8 bytes: inline continuation bytes or offset into string buffer stream +pub const STRING_INLINE_PREFIX_BYTES: usize = 4; +pub const STRING_INLINE_DATA_BYTES: usize = 8; +pub const STRING_INLINE_TOTAL_BYTES: usize = STRING_INLINE_PREFIX_BYTES + STRING_INLINE_DATA_BYTES; + +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub struct StringView { + pub size: u32, + pub prefix: u32, + pub data: u64, +} + pub struct Column { data_memory: GpuMemory, validity_memory: Option, + string_buffer_memory: Option, pub num_rows: usize, } @@ -34,7 +54,42 @@ impl Column { None }; - Ok(Column { data_memory, validity_memory, num_rows: data.len() }) + Ok(Column { + data_memory, + validity_memory, + string_buffer_memory: None, + num_rows: data.len(), + }) + } + + pub fn new_string( + views: &[StringView], string_buffer: &[u8], null_bits: Option<&[B]>, + ) -> Result> + where + B: Sized, + { + let memory_type = MemoryType::Device; + let data_memory = memory_type + .allocate_from_slice(views) + .map_err(|e| format!("Failed to allocate device memory for string views: {}", e))?; + let string_buffer_memory = if string_buffer.is_empty() { + None + } else { + Some(memory_type.allocate_from_slice(string_buffer).map_err(|e| { + format!("Failed to allocate device memory for string buffer: {}", e) + })?) + }; + + let validity_memory = if let Some(null_bits) = null_bits { + let device_bitmap = memory_type.allocate_from_slice(null_bits).map_err(|e| { + format!("Failed to allocate device memory for validity bitmap: {}", e) + })?; + Some(device_bitmap) + } else { + None + }; + + Ok(Column { data_memory, validity_memory, string_buffer_memory, num_rows: views.len() }) } pub fn new_uninitialized( @@ -57,7 +112,37 @@ impl Column { None }; - Ok(Column { data_memory, validity_memory, num_rows }) + Ok(Column { data_memory, validity_memory, string_buffer_memory: None, num_rows }) + } + + pub fn new_uninitialized_string( + num_rows: usize, string_buffer_size: usize, null_bits_len: usize, + ) -> Result> { + assert!(num_rows > 0, "Cannot allocate zero-row string column."); + let memory_type = MemoryType::Device; + let data_memory = memory_type + .allocate(num_rows * std::mem::size_of::()) + .map_err(|e| format!("Failed to allocate device memory for string views: {}", e))?; + + let string_buffer_memory = if string_buffer_size > 0 { + Some(memory_type.allocate(string_buffer_size).map_err(|e| { + format!("Failed to allocate device memory for string buffer: {}", e) + })?) + } else { + None + }; + + let validity_memory = if null_bits_len > 0 { + let validity_memory = + memory_type.allocate(null_bits_len * std::mem::size_of::()).map_err(|e| { + format!("Failed to allocate device memory for validity bitmap: {}", e) + })?; + Some(validity_memory) + } else { + None + }; + + Ok(Column { data_memory, validity_memory, string_buffer_memory, num_rows }) } pub fn len(&self) -> usize { @@ -73,11 +158,16 @@ impl Column { let validity_ptr = self.validity_memory.as_ref().map_or(std::ptr::null(), |vm| vm.device_ptr()); + let string_buffer_ptr = + self.string_buffer_memory.as_ref().map_or(std::ptr::null_mut(), |vm| vm.device_ptr()); + let string_buffer_size = self.string_buffer_memory.as_ref().map_or(0, |vm| vm.len()); ColumnFFI { data: data_ptr as *const std::os::raw::c_void, null_bits: validity_ptr as *const B, - size: self.data_memory.len(), + size: self.num_rows, + string_buffer: string_buffer_ptr as *const u8, + string_buffer_size, } } @@ -96,6 +186,16 @@ impl Column { }) .transpose() } + + pub fn host_string_buffer(&self) -> Result>, Box> { + self.string_buffer_memory + .as_ref() + .map(|vm| { + vm.to_vec::() + .map_err(|e| format!("Failed to copy string buffer from device: {}", e).into()) + }) + .transpose() + } } #[repr(C)] @@ -104,4 +204,6 @@ pub(crate) struct ColumnFFI { pub data: *const std::os::raw::c_void, pub null_bits: *const B, pub size: usize, + pub string_buffer: *const u8, + pub string_buffer_size: usize, } diff --git a/tachyon/gpu/src/ffi/kernels/column.cuh b/tachyon/gpu/src/ffi/kernels/column.cuh index 465b505..9b84092 100644 --- a/tachyon/gpu/src/ffi/kernels/column.cuh +++ b/tachyon/gpu/src/ffi/kernels/column.cuh @@ -14,9 +14,14 @@ struct Column { void *data; void *null_bits; size_t size; + void *string_buffer; + size_t string_buffer_size; - __host__ __device__ Column(void *data, size_t size, void *null_bits = nullptr) - : data(data), null_bits(null_bits), size(size) { + __host__ __device__ Column(void *data, size_t size, void *null_bits = nullptr, + void *string_buffer = nullptr, + size_t string_buffer_size = 0) + : data(data), null_bits(null_bits), size(size), + string_buffer(string_buffer), string_buffer_size(string_buffer_size) { ASSERT(data != nullptr, "Column data pointer must not be null"); ASSERT(size > 0, "Column size must be greater than zero"); } @@ -49,8 +54,12 @@ struct Column { load_value.valid = false; return load_value; } - using NativeType = typename TypeTraits::NativeType; - load_value.value = reinterpret_cast(data)[idx]; + if constexpr (K == TypeKind::String) { + load_value.value = reinterpret_cast(data)[idx]; + } else { + using NativeType = typename TypeTraits::NativeType; + load_value.value = reinterpret_cast(data)[idx]; + } return load_value; } @@ -60,8 +69,12 @@ struct Column { ASSERT(idx < size, "store(): index out of range"); set_valid(idx, store_value.valid); if (store_value.valid) { - using NativeType = typename TypeTraits::NativeType; - reinterpret_cast(data)[idx] = store_value.value; + if constexpr (K == TypeKind::String) { + reinterpret_cast(data)[idx] = store_value.value; + } else { + using NativeType = typename TypeTraits::NativeType; + reinterpret_cast(data)[idx] = store_value.value; + } } } }; diff --git a/tachyon/gpu/src/ffi/kernels/string_ops.cuh b/tachyon/gpu/src/ffi/kernels/string_ops.cuh new file mode 100644 index 0000000..b15e755 --- /dev/null +++ b/tachyon/gpu/src/ffi/kernels/string_ops.cuh @@ -0,0 +1,433 @@ +/* + * Copyright (c) NeoCraft Technologies. + * + * This source code is licensed under the Apache License, Version 2.0, + * as found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "column.cuh" +#include "types.cuh" + +namespace string_ops { + +__device__ __forceinline__ uint32_t pack_prefix(const char *data, + uint32_t size) { + uint32_t prefix = 0; + const uint32_t take = + size < STRING_INLINE_PREFIX_BYTES ? size : STRING_INLINE_PREFIX_BYTES; + for (uint32_t i = 0; i < take; i++) { + prefix |= (static_cast(static_cast(data[i])) << (i * 8)); + } + return prefix; +} + +__device__ __forceinline__ const char * +view_data_ptr(const StringView &sv, const Column &col, char *inline_buf) { + if (sv.size <= STRING_INLINE_TOTAL_BYTES) { + for (uint32_t i = 0; i < STRING_INLINE_PREFIX_BYTES; i++) { + inline_buf[i] = static_cast((sv.prefix >> (i * 8)) & 0xFF); + } + for (uint32_t i = 0; i < STRING_INLINE_DATA_BYTES; i++) { + inline_buf[STRING_INLINE_PREFIX_BYTES + i] = + static_cast((sv.data >> (i * 8)) & 0xFF); + } + return inline_buf; + } + ASSERT(col.string_buffer != nullptr, + "Missing string buffer for external string"); + return reinterpret_cast(col.string_buffer) + + static_cast(sv.data); +} + +__device__ __forceinline__ bool write_string_view(const char *src, uint32_t len, + Column &out_col, + size_t row_idx, + StringView &out_view) { + out_view.size = len; + out_view.prefix = pack_prefix(src, len); + + if (len <= STRING_INLINE_TOTAL_BYTES) { + uint64_t payload = 0; + for (uint32_t i = STRING_INLINE_PREFIX_BYTES; i < len; i++) { + payload |= (static_cast(static_cast(src[i])) + << ((i - STRING_INLINE_PREFIX_BYTES) * 8)); + } + out_view.data = payload; + return true; + } + + if (out_col.string_buffer == nullptr || out_col.string_buffer_size == 0 || + out_col.size == 0) { + return false; + } + + size_t stride = out_col.string_buffer_size / out_col.size; + if (stride < len) { + return false; + } + + const size_t offset = row_idx * stride; + char *dst = reinterpret_cast(out_col.string_buffer) + offset; + for (uint32_t i = 0; i < len; i++) { + dst[i] = src[i]; + } + out_view.data = static_cast(offset); + return true; +} + +__device__ __forceinline__ uint32_t utf8_sequence_len(uint8_t lead) { + if ((lead & 0x80u) == 0) + return 1; + if ((lead & 0xE0u) == 0xC0u) + return 2; + if ((lead & 0xF0u) == 0xE0u) + return 3; + if ((lead & 0xF8u) == 0xF0u) + return 4; + return 1; +} + +__device__ __forceinline__ bool utf8_is_cont(uint8_t b) { + return (b & 0xC0u) == 0x80u; +} + +__device__ __forceinline__ void utf8_decode_next(const char *src, uint32_t len, + uint32_t offset, + uint32_t &codepoint, + uint32_t &next_offset) { + if (offset >= len) { + codepoint = 0; + next_offset = offset; + return; + } + + const uint8_t b0 = static_cast(src[offset]); + const uint32_t n = utf8_sequence_len(b0); + + if (n == 1 || offset + n > len) { + codepoint = b0; + next_offset = offset + 1; + return; + } + + if (n == 2) { + const uint8_t b1 = static_cast(src[offset + 1]); + if (!utf8_is_cont(b1)) { + codepoint = b0; + next_offset = offset + 1; + return; + } + codepoint = ((b0 & 0x1Fu) << 6) | (b1 & 0x3Fu); + next_offset = offset + 2; + return; + } + + if (n == 3) { + const uint8_t b1 = static_cast(src[offset + 1]); + const uint8_t b2 = static_cast(src[offset + 2]); + if (!utf8_is_cont(b1) || !utf8_is_cont(b2)) { + codepoint = b0; + next_offset = offset + 1; + return; + } + codepoint = ((b0 & 0x0Fu) << 12) | ((b1 & 0x3Fu) << 6) | (b2 & 0x3Fu); + next_offset = offset + 3; + return; + } + + const uint8_t b1 = static_cast(src[offset + 1]); + const uint8_t b2 = static_cast(src[offset + 2]); + const uint8_t b3 = static_cast(src[offset + 3]); + if (!utf8_is_cont(b1) || !utf8_is_cont(b2) || !utf8_is_cont(b3)) { + codepoint = b0; + next_offset = offset + 1; + return; + } + codepoint = ((b0 & 0x07u) << 18) | ((b1 & 0x3Fu) << 12) | + ((b2 & 0x3Fu) << 6) | (b3 & 0x3Fu); + next_offset = offset + 4; +} + +__device__ __forceinline__ uint32_t utf8_encode(uint32_t cp, char *dst) { + if (cp <= 0x7F) { + dst[0] = static_cast(cp); + return 1; + } + if (cp <= 0x7FF) { + dst[0] = static_cast(0xC0u | ((cp >> 6) & 0x1Fu)); + dst[1] = static_cast(0x80u | (cp & 0x3Fu)); + return 2; + } + if (cp <= 0xFFFF) { + dst[0] = static_cast(0xE0u | ((cp >> 12) & 0x0Fu)); + dst[1] = static_cast(0x80u | ((cp >> 6) & 0x3Fu)); + dst[2] = static_cast(0x80u | (cp & 0x3Fu)); + return 3; + } + + if (cp > 0x10FFFF) { + cp = static_cast('?'); + } + dst[0] = static_cast(0xF0u | ((cp >> 18) & 0x07u)); + dst[1] = static_cast(0x80u | ((cp >> 12) & 0x3Fu)); + dst[2] = static_cast(0x80u | ((cp >> 6) & 0x3Fu)); + dst[3] = static_cast(0x80u | (cp & 0x3Fu)); + return 4; +} + +__device__ __forceinline__ uint32_t unicode_to_lower(uint32_t cp) { + if (cp >= 'A' && cp <= 'Z') + return cp + 32; + if ((cp >= 0x00C0 && cp <= 0x00D6) || (cp >= 0x00D8 && cp <= 0x00DE)) + return cp + 32; + if (cp >= 0x0391 && cp <= 0x03A1) + return cp + 32; + if (cp >= 0x03A3 && cp <= 0x03AB) + return cp + 32; + if (cp >= 0x0410 && cp <= 0x042F) + return cp + 32; + if (cp == 0x0401) + return 0x0451; + return cp; +} + +__device__ __forceinline__ uint32_t unicode_to_upper(uint32_t cp) { + if (cp >= 'a' && cp <= 'z') + return cp - 32; + if ((cp >= 0x00E0 && cp <= 0x00F6) || (cp >= 0x00F8 && cp <= 0x00FE)) + return cp - 32; + if (cp >= 0x03B1 && cp <= 0x03C1) + return cp - 32; + if (cp >= 0x03C3 && cp <= 0x03CB) + return cp - 32; + if (cp == 0x03C2) + return 0x03A3; + if (cp >= 0x0430 && cp <= 0x044F) + return cp - 32; + if (cp == 0x0451) + return 0x0401; + return cp; +} + +__device__ __forceinline__ uint32_t utf8_codepoint_count(const char *src, + uint32_t len) { + uint32_t count = 0; + uint32_t i = 0; + while (i < len) { + uint32_t cp = 0; + uint32_t next = i; + utf8_decode_next(src, len, i, cp, next); + i = next; + count++; + } + return count; +} + +__device__ __forceinline__ uint32_t +utf8_byte_offset_for_char(const char *src, uint32_t len, uint32_t char_idx) { + uint32_t i = 0; + uint32_t c = 0; + while (i < len && c < char_idx) { + uint32_t cp = 0; + uint32_t next = i; + utf8_decode_next(src, len, i, cp, next); + i = next; + c++; + } + return i; +} + +__device__ __forceinline__ UInt32 length(const String &s, + const Column &input_col) { + UInt32 out; + out.valid = s.valid; + if (out.valid) { + char inline_buf[STRING_INLINE_TOTAL_BYTES]; + const StringView &sv = s.value; + const char *src = view_data_ptr(sv, input_col, inline_buf); + out.value = utf8_codepoint_count(src, sv.size); + } + return out; +} + +__device__ __forceinline__ bool +transform_case_utf8(const String &s, const Column &input_col, Column &out_col, + size_t row_idx, bool to_upper, StringView &out_view) { + char inline_buf[STRING_INLINE_TOTAL_BYTES]; + const char *src = view_data_ptr(s.value, input_col, inline_buf); + const uint32_t in_len = s.value.size; + + char inline_dst[STRING_INLINE_TOTAL_BYTES]; + char *dst = inline_dst; + size_t stride = 0; + if (in_len > STRING_INLINE_TOTAL_BYTES) { + if (out_col.string_buffer == nullptr || out_col.size == 0) { + return false; + } + stride = out_col.string_buffer_size / out_col.size; + if (stride < in_len) { + return false; + } + dst = reinterpret_cast(out_col.string_buffer) + row_idx * stride; + } + + uint32_t in_off = 0; + uint32_t out_off = 0; + while (in_off < in_len) { + uint32_t cp = 0; + uint32_t next = in_off; + utf8_decode_next(src, in_len, in_off, cp, next); + cp = to_upper ? unicode_to_upper(cp) : unicode_to_lower(cp); + char enc[4]; + uint32_t enc_len = utf8_encode(cp, enc); + if (out_off + enc_len > in_len) { + return false; + } + for (uint32_t i = 0; i < enc_len; i++) { + dst[out_off + i] = enc[i]; + } + out_off += enc_len; + in_off = next; + } + + return write_string_view(dst, out_off, out_col, row_idx, out_view); +} + +__device__ __forceinline__ String lower(const String &s, + const Column &input_col, + Column &out_col, size_t row_idx) { + String out; + out.valid = s.valid; + if (!out.valid) { + return out; + } + out.valid = + transform_case_utf8(s, input_col, out_col, row_idx, false, out.value); + return out; +} + +__device__ __forceinline__ String upper(const String &s, + const Column &input_col, + Column &out_col, size_t row_idx) { + String out; + out.valid = s.valid; + if (!out.valid) { + return out; + } + out.valid = + transform_case_utf8(s, input_col, out_col, row_idx, true, out.value); + return out; +} + +__device__ __forceinline__ String substring(const String &s, int32_t start, + int32_t len, + const Column &input_col, + Column &out_col, size_t row_idx) { + String out; + out.valid = s.valid; + if (!out.valid) { + return out; + } + + char inline_buf[STRING_INLINE_TOTAL_BYTES]; + const char *src = view_data_ptr(s.value, input_col, inline_buf); + const uint32_t byte_len = s.value.size; + const uint32_t char_count = utf8_codepoint_count(src, byte_len); + + int32_t sidx = start < 0 ? 0 : start; + if (sidx > static_cast(char_count)) { + sidx = static_cast(char_count); + } + int32_t take = len < 0 ? 0 : len; + if (sidx + take > static_cast(char_count)) { + take = static_cast(char_count) - sidx; + } + + const uint32_t start_byte = + utf8_byte_offset_for_char(src, byte_len, static_cast(sidx)); + const uint32_t end_byte = utf8_byte_offset_for_char( + src, byte_len, static_cast(sidx + take)); + const uint32_t out_len = end_byte - start_byte; + + if (out_len <= STRING_INLINE_TOTAL_BYTES) { + char local_buf[STRING_INLINE_TOTAL_BYTES]; + for (uint32_t i = 0; i < out_len; i++) { + local_buf[i] = src[start_byte + i]; + } + out.valid = + write_string_view(local_buf, out_len, out_col, row_idx, out.value); + return out; + } + + if (out_col.string_buffer == nullptr || out_col.size == 0) { + out.valid = false; + return out; + } + size_t stride = out_col.string_buffer_size / out_col.size; + if (stride < out_len) { + out.valid = false; + return out; + } + + char *row_dst = + reinterpret_cast(out_col.string_buffer) + row_idx * stride; + for (uint32_t i = 0; i < out_len; i++) { + row_dst[i] = src[start_byte + i]; + } + out.valid = write_string_view(row_dst, out_len, out_col, row_idx, out.value); + return out; +} + +__device__ __forceinline__ String concat(const String &lhs, + const Column &lhs_col, + const String &rhs, + const Column &rhs_col, Column &out_col, + size_t row_idx) { + String out; + out.valid = lhs.valid & rhs.valid; + if (!out.valid) { + return out; + } + + char lhs_inline[STRING_INLINE_TOTAL_BYTES]; + char rhs_inline[STRING_INLINE_TOTAL_BYTES]; + const char *lptr = view_data_ptr(lhs.value, lhs_col, lhs_inline); + const char *rptr = view_data_ptr(rhs.value, rhs_col, rhs_inline); + const uint32_t lsize = lhs.value.size; + const uint32_t rsize = rhs.value.size; + const uint32_t out_len = lsize + rsize; + + if (out_len <= STRING_INLINE_TOTAL_BYTES) { + char local_buf[STRING_INLINE_TOTAL_BYTES]; + for (uint32_t i = 0; i < lsize; i++) { + local_buf[i] = lptr[i]; + } + for (uint32_t i = 0; i < rsize; i++) { + local_buf[lsize + i] = rptr[i]; + } + out.valid = + write_string_view(local_buf, out_len, out_col, row_idx, out.value); + return out; + } + + size_t stride = + out_col.size == 0 ? 0 : out_col.string_buffer_size / out_col.size; + if (stride < out_len || out_col.string_buffer == nullptr) { + out.valid = false; + return out; + } + char *row_dst = + reinterpret_cast(out_col.string_buffer) + row_idx * stride; + for (uint32_t i = 0; i < lsize; i++) { + row_dst[i] = lptr[i]; + } + for (uint32_t i = 0; i < rsize; i++) { + row_dst[lsize + i] = rptr[i]; + } + out.valid = write_string_view(row_dst, out_len, out_col, row_idx, out.value); + return out; +} + +} // namespace string_ops diff --git a/tachyon/gpu/src/ffi/kernels/types.cuh b/tachyon/gpu/src/ffi/kernels/types.cuh index b1f8c6a..e3fc9f7 100644 --- a/tachyon/gpu/src/ffi/kernels/types.cuh +++ b/tachyon/gpu/src/ffi/kernels/types.cuh @@ -101,6 +101,53 @@ DEFINE_TYPE(Float64, double, sizeof(double), true, true, #undef DEFINE_TYPE +struct StringView { + uint32_t size = 0; + uint32_t prefix = 0; + uint64_t data = 0; +}; + +static constexpr uint32_t STRING_INLINE_PREFIX_BYTES = 4; +static constexpr uint32_t STRING_INLINE_DATA_BYTES = 8; +static constexpr uint32_t STRING_INLINE_TOTAL_BYTES = + STRING_INLINE_PREFIX_BYTES + STRING_INLINE_DATA_BYTES; + +struct String { + using NativeType = StringView; + NativeType value{}; + bool valid = true; + + __host__ __device__ String() = default; + __host__ __device__ String(NativeType v) : value(v) {} + + static constexpr TypeKind kind = TypeKind::String; + static constexpr uint8_t size = sizeof(StringView); + static constexpr bool is_signed = false; + static constexpr bool is_floating = false; + static constexpr bool is_integral = false; + __host__ __device__ static constexpr NativeType min() { return NativeType{}; } + __host__ __device__ static constexpr NativeType max() { return NativeType{}; } + __host__ __device__ static constexpr NativeType zero() { + return NativeType{}; + } + __host__ __device__ operator NativeType() const { return value; } +}; + +template <> struct TypeTraits { + using WrapperType = String; + using NativeType = StringView; + static constexpr TypeKind kind = TypeKind::String; + static constexpr uint8_t size = sizeof(StringView); + static constexpr bool is_signed = false; + static constexpr bool is_floating = false; + static constexpr unsigned int size_bytes = sizeof(StringView); + __host__ __device__ static constexpr NativeType min() { return NativeType{}; } + __host__ __device__ static constexpr NativeType max() { return NativeType{}; } + __host__ __device__ static constexpr NativeType zero() { + return NativeType{}; + } +}; + template struct KindToWrapper; #define DEFINE_KIND_MAPPING(ENUM_VAL) \ template <> struct KindToWrapper { \ @@ -120,6 +167,7 @@ DEFINE_KIND_MAPPING(BFloat16) DEFINE_KIND_MAPPING(Float16) DEFINE_KIND_MAPPING(Float32) DEFINE_KIND_MAPPING(Float64) +DEFINE_KIND_MAPPING(String) #undef DEFINE_KIND_MAPPING @@ -158,6 +206,7 @@ __constant__ const TypeDescriptor TYPE_DESCRIPTORS[] = { TypeDescriptor::from_type(), TypeDescriptor::from_type(), TypeDescriptor::from_type(), + TypeDescriptor::from_type(), }; __host__ __device__ inline const TypeDescriptor &