diff --git a/README.md b/README.md index 3ad6b9e..1284ede 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,11 @@ A minimal shell written in Rust. +## Features + +- Runs shebang scripts on Windows, including `#!/usr/bin/env node` and `env -S`. +- Accepts Windows drive paths written as either `C:\Users\name` or `/c/Users/name`. + ## Requirements - https://github.com/microsoft/coreutils diff --git a/src/lib.rs b/src/lib.rs index e4cbc2f..1aa07e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,8 @@ mod commands; mod parser; mod path; mod runtime; +#[cfg(windows)] +mod shebang; pub use runtime::{RunOptions, Shell}; diff --git a/src/path.rs b/src/path.rs index a5f1199..eea5d3a 100644 --- a/src/path.rs +++ b/src/path.rs @@ -2,7 +2,7 @@ use std::path::PathBuf; pub(crate) fn shell_path(path: &str) -> PathBuf { #[cfg(windows)] - if let Some(path) = msys_drive_path(path) { + if let Some(path) = slash_drive_path(path) { return path; } @@ -15,7 +15,7 @@ pub(crate) fn is_explicit_path(path: &str) -> bool { } #[cfg(windows)] -fn msys_drive_path(path: &str) -> Option { +fn slash_drive_path(path: &str) -> Option { let path = path.replace('\\', "/"); let mut chars = path.chars(); @@ -59,13 +59,13 @@ mod tests { #[cfg(windows)] #[test] - fn converts_msys_drive_root() { + fn converts_slash_drive_root() { assert_eq!(shell_path("/c"), PathBuf::from("C:\\")); } #[cfg(windows)] #[test] - fn converts_msys_drive_path() { + fn converts_slash_drive_path() { assert_eq!( shell_path("/c/Users/test"), PathBuf::from("C:\\Users\\test") diff --git a/src/runtime.rs b/src/runtime.rs index e3a17ad..d90b04a 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -1,6 +1,8 @@ use crate::commands; use crate::parser::{Command as AstCommand, ListItem, Pipeline, RedirectKind, Word, parse}; use crate::path::{is_explicit_path, shell_path}; +#[cfg(windows)] +use crate::shebang; use anyhow::{Context, Result, bail}; use std::collections::HashMap; use std::env; @@ -359,7 +361,7 @@ impl Shell { fn external_command(&self, spec: &ExternalCommandSpec) -> Result { let program = resolve_program(&spec.name, &self.vars)?; - let mut command = Command::new(program); + let mut command = command_for_program(&program, &self.vars)?; command.args(&spec.argv); command.envs(&self.vars); command.envs(&spec.env_overlay); @@ -460,6 +462,25 @@ struct ExternalCommandSpec { ast: AstCommand, } +fn command_for_program(program: &Path, vars: &HashMap) -> Result { + #[cfg(windows)] + if shebang::is_candidate(program) + && let Some(shebang) = shebang::read(program)? + { + let interpreter = resolve_program(&shebang.program, vars) + .with_context(|| format!("shebang interpreter not found: {}", shebang.program))?; + let mut command = Command::new(interpreter); + command.args(shebang.args); + command.arg(shebang::script_path(program)); + return Ok(command); + } + + #[cfg(not(windows))] + let _ = vars; + + Ok(Command::new(program)) +} + fn wait_status(child: Child) -> Result { Ok(child.wait_with_output()?.status.code().unwrap_or(1)) } @@ -759,7 +780,7 @@ mod tests { #[cfg(windows)] #[test] - fn cd_accepts_msys_drive_path() { + fn cd_accepts_slash_drive_path() { let current_dir = env::current_dir().unwrap(); let drive = current_dir .display() diff --git a/src/shebang.rs b/src/shebang.rs new file mode 100644 index 0000000..931c2a8 --- /dev/null +++ b/src/shebang.rs @@ -0,0 +1,246 @@ +//! Windows shebang support for non-native executable files. +//! +//! Shebangs are not specified by POSIX. On Unix-like systems they are handled +//! by the kernel as part of `execve`, commonly using this form: +//! +//! ```text +//! #!interpreter [optional-arg] +//! ``` +//! +//! The optional argument is passed to the interpreter as one argument. It is +//! not generally split on whitespace. The resulting invocation is equivalent +//! to: +//! +//! ```text +//! interpreter [optional-arg] script-path original-args... +//! ``` +//! +//! Windows `CreateProcess` does not interpret shebangs, so shell performs that +//! step before spawning a non-native file. Files ending in `.com`, `.exe`, +//! `.bat`, or `.cmd` remain native Windows programs and bypass this module. +//! +//! `/usr/bin/env command` is treated as a portability request and resolves +//! `command` through the Windows `PATH` and `PATHEXT`. The non-POSIX `env -S` +//! extension is also supported for scripts that intentionally require +//! multiple interpreter arguments. Without `-S`, the complete optional +//! argument remains one command name, matching common Unix shebang behavior. +//! +//! Parsing is intentionally limited to the first line and rejects lines over +//! [`MAX_LINE_LENGTH`] bytes. A shebang must begin with `#!` at byte zero; +//! UTF-8 BOM-prefixed files are not treated as shebang scripts. + +use anyhow::{Result, bail}; +use std::fs::File; +use std::io::Read; +use std::path::{Path, PathBuf}; + +const MAX_LINE_LENGTH: usize = 4096; + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct Invocation { + pub program: String, + pub args: Vec, +} + +pub(crate) fn is_candidate(path: &Path) -> bool { + path.is_file() && !has_windows_native_extension(path) +} + +pub(crate) fn script_path(path: &Path) -> PathBuf { + PathBuf::from(path.to_string_lossy().replace('/', "\\")) +} + +fn has_windows_native_extension(path: &Path) -> bool { + path.extension() + .and_then(|extension| extension.to_str()) + .is_some_and(|extension| { + matches!( + extension.to_ascii_lowercase().as_str(), + "com" | "exe" | "bat" | "cmd" + ) + }) +} + +pub(crate) fn read(path: &Path) -> Result> { + let mut bytes = [0; MAX_LINE_LENGTH + 1]; + let length = File::open(path)?.read(&mut bytes)?; + let bytes = &bytes[..length]; + let line_end = match bytes.iter().position(|byte| *byte == b'\n') { + Some(line_end) => line_end, + None if length > MAX_LINE_LENGTH => bail!("shebang line exceeds {MAX_LINE_LENGTH} bytes"), + None => bytes.len(), + }; + let Ok(line) = std::str::from_utf8(&bytes[..line_end]) else { + return Ok(None); + }; + parse(line) +} + +fn parse(line: &str) -> Result> { + let command = match line.strip_prefix("#!") { + Some(command) => command.trim_matches([' ', '\t', '\r']), + None => return Ok(None), + }; + if command.is_empty() { + return Ok(None); + } + + let split = command.find([' ', '\t']).unwrap_or(command.len()); + let interpreter = &command[..split]; + let optional_arg = command[split..].trim_matches([' ', '\t']); + + if is_env(interpreter) { + return parse_env(optional_arg).map(Some); + } + + let args = if optional_arg.is_empty() { + Vec::new() + } else { + vec![optional_arg.to_string()] + }; + Ok(Some(Invocation { + program: interpreter.to_string(), + args, + })) +} + +fn is_env(interpreter: &str) -> bool { + Path::new(interpreter) + .file_name() + .and_then(|name| name.to_str()) + .is_some_and(|name| name.eq_ignore_ascii_case("env")) +} + +fn parse_env(argument: &str) -> Result { + if argument.is_empty() { + bail!("env shebang is missing a command"); + } + + if let Some(command) = argument.strip_prefix("-S") { + let mut words = split_env_s(command.trim_start())?; + if words.is_empty() { + bail!("env -S shebang is missing a command"); + } + return Ok(Invocation { + program: words.remove(0), + args: words, + }); + } + + Ok(Invocation { + program: argument.to_string(), + args: Vec::new(), + }) +} + +fn split_env_s(input: &str) -> Result> { + let mut words = Vec::new(); + let mut word = String::new(); + let mut chars = input.chars().peekable(); + let mut quote = None; + let mut started = false; + + while let Some(ch) = chars.next() { + match (quote, ch) { + (None, '\'' | '"') => { + quote = Some(ch); + started = true; + } + (Some(current), ch) if ch == current => quote = None, + (None, '\\') | (Some('"'), '\\') => { + let Some(next) = chars.next() else { + bail!("env -S shebang ends with an escape"); + }; + word.push(next); + started = true; + } + (None, ch) if ch.is_ascii_whitespace() => { + if started { + words.push(std::mem::take(&mut word)); + started = false; + } + } + _ => { + word.push(ch); + started = true; + } + } + } + + if quote.is_some() { + bail!("env -S shebang has an unterminated quote"); + } + if started { + words.push(word); + } + Ok(words) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_interpreter_and_one_optional_argument() { + assert_eq!( + parse("#!/usr/bin/node --no-warnings --trace-warnings\r").unwrap(), + Some(Invocation { + program: "/usr/bin/node".into(), + args: vec!["--no-warnings --trace-warnings".into()], + }) + ); + } + + #[test] + fn parses_env_command() { + assert_eq!( + parse("#!/usr/bin/env node").unwrap(), + Some(Invocation { + program: "node".into(), + args: Vec::new(), + }) + ); + } + + #[test] + fn keeps_unsplit_env_argument_as_one_command_name() { + assert_eq!( + parse("#!/usr/bin/env node --no-warnings").unwrap(), + Some(Invocation { + program: "node --no-warnings".into(), + args: Vec::new(), + }) + ); + } + + #[test] + fn parses_env_split_string() { + assert_eq!( + parse("#!/usr/bin/env -S node --eval 'console.log(\"hello world\")'").unwrap(), + Some(Invocation { + program: "node".into(), + args: vec!["--eval".into(), "console.log(\"hello world\")".into(),], + }) + ); + } + + #[test] + fn rejects_invalid_env_split_string() { + assert!(parse("#!/usr/bin/env -S node 'unterminated").is_err()); + } + + #[test] + fn ignores_non_shebang_lines() { + assert_eq!(parse("\u{feff}#!/usr/bin/env node").unwrap(), None); + assert_eq!(parse("echo hello").unwrap(), None); + } + + #[test] + fn keeps_windows_native_programs_direct() { + assert!(has_windows_native_extension(Path::new("tool.exe"))); + assert!(has_windows_native_extension(Path::new("tool.CMD"))); + assert!(has_windows_native_extension(Path::new("tool.bat"))); + assert!(!has_windows_native_extension(Path::new("tool.ts"))); + assert!(!has_windows_native_extension(Path::new("tool"))); + } +} diff --git a/tests/cli.rs b/tests/cli.rs index 3d892d9..87da4a3 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -34,8 +34,8 @@ fn supports_command_substitution() { #[cfg(windows)] #[test] -fn runs_script_from_msys_style_drive_path() { - let script = std::env::temp_dir().join("shell-msys-path-test.sh"); +fn runs_script_from_slash_drive_path() { + let script = std::env::temp_dir().join("shell-slash-drive-path-test.sh"); std::fs::write(&script, "echo script\n").unwrap(); let script = script.display().to_string().replace('\\', "/"); let script = format!( @@ -53,6 +53,27 @@ fn runs_script_from_msys_style_drive_path() { assert_eq!(String::from_utf8_lossy(&output.stdout), "script\n"); } +#[cfg(windows)] +#[test] +fn runs_script_using_shebang() { + let script = std::env::temp_dir().join("shell-shebang-test.ts"); + let source = "#!/usr/bin/env node\r\nconsole.log(\"shebang works\");\r\n"; + std::fs::write(&script, source).unwrap(); + let script = script.display().to_string().replace('\\', "/"); + + let output = Command::new(env!("CARGO_BIN_EXE_shell")) + .args(["-c", &format!("\"{script}\"")]) + .output() + .unwrap(); + + assert!( + output.status.success(), + "{}", + String::from_utf8_lossy(&output.stderr) + ); + assert_eq!(String::from_utf8_lossy(&output.stdout), "shebang works\n"); +} + #[test] fn failed_cd_does_not_run_argument_as_command() { let output = Command::new(env!("CARGO_BIN_EXE_shell"))