From af4d38ca3c378501c123743a40db75883edfe205 Mon Sep 17 00:00:00 2001 From: Tomokatsu Iguchi Date: Mon, 9 Jan 2023 14:01:59 +0900 Subject: [PATCH] [WIP] parallel (but no test success for multibytes chars) --- benches/replace.rs | 33 +++++- src/main.rs | 13 +- src/replace_dollar_braces.rs | 222 +++++++++++++++++++++++++++++++++-- 3 files changed, 258 insertions(+), 10 deletions(-) diff --git a/benches/replace.rs b/benches/replace.rs index 9b28be2..4867579 100644 --- a/benches/replace.rs +++ b/benches/replace.rs @@ -1,9 +1,10 @@ use criterion::{criterion_group, criterion_main, Criterion}; use envbed::model::EnvVar; -use envbed::replace_dollar_braces::{replace_dollar_braces, replace_dollar_braces_with_hashmap}; +use envbed::replace_dollar_braces::{replace_dollar_braces, replace_dollar_braces_with_hashmap, replace_dollar_braces_with_hashmap_parallel}; use envbed::replace_double_braces::{replace_double_braces, replace_double_braces_with_hashmap}; use rustc_hash::FxHasher; +use std::sync::{Arc, Mutex}; use std::{collections::HashMap, hash::BuildHasherDefault}; fn bench_dollar_vec(c: &mut Criterion) { @@ -95,6 +96,35 @@ fn bench_dollar_hash(c: &mut Criterion) { }); } +fn bench_dollar_hash_parallel(c: &mut Criterion) { + let mut envvars: HashMap> = HashMap::default(); + envvars.insert("KEY1".to_owned(), "value1".to_owned()); + envvars.insert("KEY2".to_owned(), "value2".to_owned()); + envvars.insert("KEY3".to_owned(), "value3".to_owned()); + envvars.insert("KEY4".to_owned(), "value4".to_owned()); + envvars.insert("KEY5".to_owned(), "value5".to_owned()); + envvars.insert("KEY6".to_owned(), "value6".to_owned()); + envvars.insert("KEY7".to_owned(), "value7".to_owned()); + envvars.insert("KEY8".to_owned(), "value8".to_owned()); + envvars.insert("KEY9".to_owned(), "value9".to_owned()); + envvars.insert("KEY10".to_owned(), "value10".to_owned()); + let target = + "2fwa${KEY1}hfasd${KEY1}fnadnfa0${KEY1}2fwah${KEY1}fasdf${KEY1}na${KEY1}${KEY1}dnfa0 + 2fwa${KEY2}hfasd${KEY2}fnadnfa0${KEY2}2fwah${KEY2}fasdf${KEY2}na${KEY2}${KEY2}dnfa0 + 2fwa${KEY3}hfasd${KEY3}fnadnfa0${KEY3}2fwah${KEY3}fasdf${KEY3}na${KEY3}${KEY3}dnfa0 + 2fwa${KEY4}hfasd${KEY4}fnadnfa0${KEY4}2fwah${KEY4}fasdf${KEY4}na${KEY4}${KEY4}dnfa0 + 2fwa${KEY5}hfasd${KEY5}fnadnfa0${KEY5}2fwah${KEY5}fasdf${KEY5}na${KEY5}${KEY5}dnfa0 + 2fwa${KEY6}hfasd${KEY6}fnadnfa0${KEY6}2fwah${KEY6}fasdf${KEY6}na${KEY6}${KEY6}dnfa0 + 2fwa${KEY7}hfasd${KEY7}fnadnfa0${KEY7}2fwah${KEY7}fasdf${KEY7}na${KEY7}${KEY7}dnfa0 + 2fwa${KEY8}hfasd${KEY8}fnadnfa0${KEY8}2fwah${KEY8}fasdf${KEY8}na${KEY8}${KEY8}dnfa0 + 2fwa${KEY9}hfasd${KEY9}fnadnfa0${KEY9}2fwah${KEY9}fasdf${KEY9}na${KEY9}${KEY9}dnfa0 + 2fwah${KEY10}fasd${KEY10}fnadnfa0$${KEY10}2fwa${KEY10}hfasd${KEY10}fn${KEY10}${KEY10}adnfa"; + + c.bench_function("replace_dollar_braces_with_hashmap_parallel", |b| { + b.iter(|| replace_dollar_braces_with_hashmap_parallel(&envvars, target.to_owned(), 24)) + }); +} + fn bench_double_vec(c: &mut Criterion) { let envvars: Vec = vec![ EnvVar { @@ -188,6 +218,7 @@ criterion_group!( benches, bench_dollar_vec, bench_dollar_hash, + bench_dollar_hash_parallel, bench_double_vec, bench_double_hash ); diff --git a/src/main.rs b/src/main.rs index 08e453d..ba01d8b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,9 @@ use atty::Stream; use clap::Parser; -use envbed::replace_dollar_braces::replace_dollar_braces_with_hashmap; +use envbed::replace_dollar_braces::{replace_dollar_braces_with_hashmap, replace_dollar_braces_with_hashmap_parallel}; use envbed::replace_double_braces::replace_double_braces_with_hashmap; use std::io::{self}; +use std::sync::{Arc, Mutex}; use std::{ env, io::{Read, Write}, @@ -32,6 +33,10 @@ struct Args { #[arg(long, default_value_t = false)] template_syntax_double_braces: bool, + /// parallel replacement (consider overhead of parallelism. this is for a huge file) + #[arg(long, default_value_t = false)] + parallel: bool, + /// override a target file (--file) #[arg(short = 'w', long, default_value_t = false)] override_file: bool, @@ -88,7 +93,11 @@ fn main() -> std::io::Result<()> { if args.template_syntax_double_braces { target = replace_double_braces_with_hashmap(&envvars, &target) } else { - target = replace_dollar_braces_with_hashmap(&envvars, &target) + if args.parallel { + target = replace_dollar_braces_with_hashmap_parallel(&envvars, target.to_owned(), 24) + } else { + target = replace_dollar_braces_with_hashmap(&envvars, &target) + } } // 3. Output diff --git a/src/replace_dollar_braces.rs b/src/replace_dollar_braces.rs index 1091e1e..06878fa 100644 --- a/src/replace_dollar_braces.rs +++ b/src/replace_dollar_braces.rs @@ -1,6 +1,7 @@ use crate::model::EnvVar; use rustc_hash::FxHasher; -use std::{collections::HashMap, hash::BuildHasherDefault}; +use std::{collections::HashMap, hash::BuildHasherDefault, sync::{mpsc, Mutex, Arc}, ops::Range}; +use std::thread; #[allow(dead_code)] pub fn replace_dollar_braces(envvars: &Vec, mut target: String) -> String { @@ -54,14 +55,181 @@ pub fn replace_dollar_braces_with_hashmap( result } + +#[allow(dead_code)] +pub fn replace_dollar_braces_with_hashmap_parallel( + envvars: &HashMap>, + target: String, + thread_n: usize, +) -> String { + replace_parallel(target, envvars, thread_n) +} + + +pub struct ChannelValue { + pub a: usize, + pub partial_match_idx: Option, + pub text: String, + pub replacement: Vec<(Range, String)>, +} + + +fn replace_parallel(target: String, envvars: &HashMap>, thread_n: usize) -> String { + // 非同期チャネルを生成 + let (tx, rx) = mpsc::channel(); + + // start + let mut result = String::new(); + + let char_count = target.chars().count(); + let start_end_range_count = + char_count / thread_n + if char_count % thread_n != 0 { 1 } else { 0 }; + + println!("parallel"); + for i in 0..(thread_n) { + let tx = tx.clone(); + let envvars = envvars.clone(); + + // variables + let start_i = start_end_range_count * i; + let end_i = if i == (thread_n - 1) { + char_count + } else { + start_end_range_count * (i + 1) + }; + let target_partial = unsafe { target.get_unchecked(start_i..end_i) }.to_owned(); + + thread::spawn(move || { + + // let mut tmp = String::new(); + let mut tmp: Vec<(Range, String)> = vec![]; + let mut env_key_start = 0; + + let mut f_match_level: usize = 0; // 0,1=$,2=${,3=${} + let mut partial_match_idx = None; + let mut idx = 0; + let mut env_key = String::new(); + for (mut j, c) in target_partial.char_indices().into_iter() { + j += start_i; + if end_i <= j { + break; + } + (f_match_level, env_key_start) = replace(j, c, f_match_level, env_key_start); + if f_match_level == 1 { + idx = j; + } else if f_match_level == 4 { + if let Some(val) = envvars.get(&env_key) { + tmp.push((env_key_start-2..j+1, val.to_owned())); + partial_match_idx = None; + } + env_key_start = 0; + f_match_level = 0; + } else if f_match_level == 3 { + env_key.push(c); + } else if f_match_level == 2 { + env_key = "".to_owned(); + } + if f_match_level > 0 && f_match_level < 4 { + partial_match_idx = Some(idx); + } + } + // tmp.push_str(unsafe { target.get_unchecked(last_end..end_i) }); + + tx.send(ChannelValue { + a: i, + partial_match_idx, + text: "".to_owned(), + replacement: tmp, + }) + .unwrap(); // (非同期)送信 + }); + } + // end + let mut hmap: HashMap> = HashMap::default(); + + // receive channel + println!("receive channel"); + for i in rx.iter().take(thread_n) { + hmap.insert(i.a, i); + } + // partial match resolve + println!("partial match resolve"); + let target_str = target.as_str(); + for i in 0..(thread_n) { + if let Some(a) = hmap.get_mut(&i) { + if let Some(partial_match_idx) = a.partial_match_idx { + let mut f_match_level = 0; + let mut env_key_start = 0; + + // until matched index + let mut j = partial_match_idx; + loop { + let b = unsafe { target_str.get_unchecked(j..j+1) }; + // FIXME: + (f_match_level, env_key_start) = replace(j, b.chars().next().unwrap(), f_match_level, env_key_start); + if f_match_level == 4 { + if let Some(val) = envvars.get(unsafe { target_str.get_unchecked(env_key_start..j) }) { + a.replacement.push((env_key_start-2..j+1, val.to_owned())); + } + break; + } else if f_match_level == 0 { + break; + } + j += 1 + } + } + } + } + // concat + println!("concat"); + let mut start_i = 0; + for i in 0..(thread_n) { + if let Some(a) = hmap.get(&i) { + for (rng, val) in &a.replacement { + result.push_str(unsafe { target_str.get_unchecked(start_i..rng.start) }); + result.push_str(val); + start_i = rng.end; + } + } + } + result.push_str(unsafe { target_str.get_unchecked(start_i..char_count) }); + println!("done!"); + result +} + +fn replace(j: usize, c: char, f_match_level: usize, env_key_start: usize) -> (usize, usize) { + // search + if c == '$' { + return (1, 0) + } else if c == '{' { + if f_match_level == 1 { + return (2, 0) + } else { + return (0, 0) + } + } else if c == '_' || c.is_alphanumeric() { + if f_match_level == 2 { + return (3, j) + } else if f_match_level == 3 { + return (3, env_key_start) + } else { + return (0, 0) + } + } else if c == '}' && f_match_level == 3 { + return (4, env_key_start) + } else { + return (0, 0) + } +} + #[cfg(test)] mod tests { use crate::{ model::EnvVar, - replace_dollar_braces::{replace_dollar_braces, replace_dollar_braces_with_hashmap}, + replace_dollar_braces::{replace_dollar_braces, replace_dollar_braces_with_hashmap, replace_dollar_braces_with_hashmap_parallel}, }; use rustc_hash::FxHasher; - use std::{collections::HashMap, hash::BuildHasherDefault}; + use std::{collections::HashMap, hash::BuildHasherDefault, sync::{Mutex, Arc}}; #[test] fn vec() { @@ -113,7 +281,7 @@ mod tests { 2fwa${KEY3}hfasd${KEY3}fnadnfa0${KEY3}2fwah${KEY3}fasdf${KEY3}na${KEY3}${KEY3}dnfa0 2fwa${KEY4}hfasd${KEY4}fnadnfa0${KEY4}2fwah${KEY4}fasdf${KEY4}na${KEY4}${KEY4}dnfa0 2fwa${KEY5}hfasd${KEY5}fnadnfa0${KEY5}2fwah${KEY5}fasdf${KEY5}na${KEY5}${KEY5}dnfa0 - 2fwa${KEY6}hfasd${KEY6}fnadnfa0${KEY6}2fwah${KEY6}fasdf${KEY6}na${KEY6}${KEY6}dnfa0 + 2fwa${KEY6}hfasd${KEY6}fnadnfa0${KEY6}2fwah${KEY6}fa😹sdf${KEY6}na${KEY6}${KEY6}dnfa0 2fwa${KEY7}hfasd${KEY7}fnadnfa0${KEY7}2fwah${KEY7}fasdf${KEY7}na${KEY7}${KEY7}dnfa0 2fwa${KEY8}hfasd${KEY8}fnadnfa0${KEY8}2fwah${KEY8}fasdf${KEY8}na${KEY8}${KEY8}dnfa0 2fwa${KEY9}hfasd${KEY9}fnadnfa0${KEY9}2fwah${KEY9}fasdf${KEY9}na${KEY9}${KEY9}dnfa0 @@ -124,7 +292,7 @@ mod tests { 2fwavalue3hfasdvalue3fnadnfa0value32fwahvalue3fasdfvalue3navalue3value3dnfa0 2fwavalue4hfasdvalue4fnadnfa0value42fwahvalue4fasdfvalue4navalue4value4dnfa0 2fwavalue5hfasdvalue5fnadnfa0value52fwahvalue5fasdfvalue5navalue5value5dnfa0 - 2fwavalue6hfasdvalue6fnadnfa0value62fwahvalue6fasdfvalue6navalue6value6dnfa0 + 2fwavalue6hfasdvalue6fnadnfa0value62fwahvalue6fa😹sdfvalue6navalue6value6dnfa0 2fwavalue7hfasdvalue7fnadnfa0value72fwahvalue7fasdfvalue7navalue7value7dnfa0 2fwavalue8hfasdvalue8fnadnfa0value82fwahvalue8fasdfvalue8navalue8value8dnfa0 2fwavalue9hfasdvalue9fnadnfa0value92fwahvalue9fasdfvalue9navalue9value9dnfa0 @@ -153,7 +321,7 @@ mod tests { 2fwa${KEY3}hfasd${KEY3}fnadnfa0${KEY3}2fwah${KEY3}fasdf${KEY3}na${KEY3}${KEY3}dnfa0 2fwa${KEY4}hfasd${KEY4}fnadnfa0${KEY4}2fwah${KEY4}fasdf${KEY4}na${KEY4}${KEY4}dnfa0 2fwa${KEY5}hfasd${KEY5}fnadnfa0${KEY5}2fwah${KEY5}fasdf${KEY5}na${KEY5}${KEY5}dnfa0 - 2fwa${KEY6}hfasd${KEY6}fnadnfa0${KEY6}2fwah${KEY6}fasdf${KEY6}na${KEY6}${KEY6}dnfa0 + 2fwa${KEY6}hfasd${KEY6}fnadnfa0${KEY6}2fwah${KEY6}fa😹sdf${KEY6}na${KEY6}${KEY6}dnfa0 2fwa${KEY7}hfasd${KEY7}fnadnfa0${KEY7}2fwah${KEY7}fasdf${KEY7}na${KEY7}${KEY7}dnfa0 2fwa${KEY8}hfasd${KEY8}fnadnfa0${KEY8}2fwah${KEY8}fasdf${KEY8}na${KEY8}${KEY8}dnfa0 2fwa${KEY9}hfasd${KEY9}fnadnfa0${KEY9}2fwah${KEY9}fasdf${KEY9}na${KEY9}${KEY9}dnfa0 @@ -164,7 +332,7 @@ mod tests { 2fwavalue3hfasdvalue3fnadnfa0value32fwahvalue3fasdfvalue3navalue3value3dnfa0 2fwavalue4hfasdvalue4fnadnfa0value42fwahvalue4fasdfvalue4navalue4value4dnfa0 2fwavalue5hfasdvalue5fnadnfa0value52fwahvalue5fasdfvalue5navalue5value5dnfa0 - 2fwavalue6hfasdvalue6fnadnfa0value62fwahvalue6fasdfvalue6navalue6value6dnfa0 + 2fwavalue6hfasdvalue6fnadnfa0value62fwahvalue6fa😹sdfvalue6navalue6value6dnfa0 2fwavalue7hfasdvalue7fnadnfa0value72fwahvalue7fasdfvalue7navalue7value7dnfa0 2fwavalue8hfasdvalue8fnadnfa0value82fwahvalue8fasdfvalue8navalue8value8dnfa0 2fwavalue9hfasdvalue9fnadnfa0value92fwahvalue9fasdfvalue9navalue9value9dnfa0 @@ -173,4 +341,44 @@ mod tests { let ret = replace_dollar_braces_with_hashmap(&envvars, target); assert_eq!(ret.to_owned(), expected.to_owned()); } + + #[test] + fn hashmap_parallel() { + let mut envvars: HashMap> = HashMap::default(); + envvars.insert("KEY1".to_owned(), "value1".to_owned()); + envvars.insert("KEY2".to_owned(), "value2".to_owned()); + envvars.insert("KEY3".to_owned(), "value3".to_owned()); + envvars.insert("KEY4".to_owned(), "value4".to_owned()); + envvars.insert("KEY5".to_owned(), "value5".to_owned()); + envvars.insert("KEY6".to_owned(), "value6".to_owned()); + envvars.insert("KEY7".to_owned(), "value7".to_owned()); + envvars.insert("KEY8".to_owned(), "value8".to_owned()); + envvars.insert("KEY9".to_owned(), "value9".to_owned()); + envvars.insert("KEY10".to_owned(), "value10".to_owned()); + let target = + "2fwa${KEY1}hfasd${KEY1}fnadnfa0${KEY1}2fwah${KEY1}fasdf${KEY1}na${KEY1}${KEY1}dnfa0 + 2fwa${KEY2}hfasd${KEY2}fnadnfa0${KEY2}2fwah${KEY2}fasdf${KEY2}na${KEY2}${KEY2}dnfa0 + 2fwa${KEY3}hfasd${KEY3}fnadnfa0${KEY3}2fwah${KEY3}fasdf${KEY3}na${KEY3}${KEY3}dnfa0 + 2fwa${KEY4}hfasd${KEY4}fnadnfa0${KEY4}2fwah${KEY4}fasdf${KEY4}na${KEY4}${KEY4}dnfa0 + 2fwa${KEY5}hfasd${KEY5}fnadnfa0${KEY5}2fwah${KEY5}fasdf${KEY5}na${KEY5}${KEY5}dnfa0 + 2fwa${KEY6}hfasd${KEY6}fnadnfa0${KEY6}2fwah${KEY6}fa😹sdf${KEY6}na${KEY6}${KEY6}dnfa0 + 2fwa${KEY7}hfasd${KEY7}fnadnfa0${KEY7}2fwah${KEY7}fasdf${KEY7}na${KEY7}${KEY7}dnfa0 + 2fwa${KEY8}hfasd${KEY8}fnadnfa0${KEY8}2fwah${KEY8}fasdf${KEY8}na${KEY8}${KEY8}dnfa0 + 2fwa${KEY9}hfasd${KEY9}fnadnfa0${KEY9}2fwah${KEY9}fasdf${KEY9}na${KEY9}${KEY9}dnfa0 + 2fwah${KEY10}fasd${KEY10}fnadnfa0${{$${KEY10}2fwa${KEY10}hfasd${KEY10}fn${KEY10}${KEY10}adnfa"; + let expected = + "2fwavalue1hfasdvalue1fnadnfa0value12fwahvalue1fasdfvalue1navalue1value1dnfa0 + 2fwavalue2hfasdvalue2fnadnfa0value22fwahvalue2fasdfvalue2navalue2value2dnfa0 + 2fwavalue3hfasdvalue3fnadnfa0value32fwahvalue3fasdfvalue3navalue3value3dnfa0 + 2fwavalue4hfasdvalue4fnadnfa0value42fwahvalue4fasdfvalue4navalue4value4dnfa0 + 2fwavalue5hfasdvalue5fnadnfa0value52fwahvalue5fasdfvalue5navalue5value5dnfa0 + 2fwavalue6hfasdvalue6fnadnfa0value62fwahvalue6fa😹sdfvalue6navalue6value6dnfa0 + 2fwavalue7hfasdvalue7fnadnfa0value72fwahvalue7fasdfvalue7navalue7value7dnfa0 + 2fwavalue8hfasdvalue8fnadnfa0value82fwahvalue8fasdfvalue8navalue8value8dnfa0 + 2fwavalue9hfasdvalue9fnadnfa0value92fwahvalue9fasdfvalue9navalue9value9dnfa0 + 2fwahvalue10fasdvalue10fnadnfa0${{$value102fwavalue10hfasdvalue10fnvalue10value10adnfa"; + + let ret = replace_dollar_braces_with_hashmap_parallel(&envvars, target.to_owned(), 24); + assert_eq!(ret.to_owned(), expected.to_owned()); + } }