pyo3/conversions/std/
osstr.rs

1use crate::instance::Bound;
2use crate::types::any::PyAnyMethods;
3use crate::types::PyString;
4use crate::{ffi, FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject};
5use std::borrow::Cow;
6use std::ffi::{OsStr, OsString};
7
8impl ToPyObject for OsStr {
9    fn to_object(&self, py: Python<'_>) -> PyObject {
10        // If the string is UTF-8, take the quick and easy shortcut
11        if let Some(valid_utf8_path) = self.to_str() {
12            return valid_utf8_path.to_object(py);
13        }
14
15        // All targets besides windows support the std::os::unix::ffi::OsStrExt API:
16        // https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59
17        #[cfg(not(windows))]
18        {
19            #[cfg(target_os = "wasi")]
20            let bytes = std::os::wasi::ffi::OsStrExt::as_bytes(self);
21            #[cfg(not(target_os = "wasi"))]
22            let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self);
23
24            let ptr = bytes.as_ptr().cast();
25            let len = bytes.len() as ffi::Py_ssize_t;
26            unsafe {
27                // DecodeFSDefault automatically chooses an appropriate decoding mechanism to
28                // parse os strings losslessly (i.e. surrogateescape most of the time)
29                let pystring = ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len);
30                PyObject::from_owned_ptr(py, pystring)
31            }
32        }
33
34        #[cfg(windows)]
35        {
36            let wstr: Vec<u16> = std::os::windows::ffi::OsStrExt::encode_wide(self).collect();
37
38            unsafe {
39                // This will not panic because the data from encode_wide is well-formed Windows
40                // string data
41                PyObject::from_owned_ptr(
42                    py,
43                    ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t),
44                )
45            }
46        }
47    }
48}
49
50// There's no FromPyObject implementation for &OsStr because albeit possible on Unix, this would
51// be impossible to implement on Windows. Hence it's omitted entirely
52
53impl FromPyObject<'_> for OsString {
54    fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult<Self> {
55        let pystring = ob.downcast::<PyString>()?;
56
57        #[cfg(not(windows))]
58        {
59            // Decode from Python's lossless bytes string representation back into raw bytes
60            let fs_encoded_bytes = unsafe {
61                crate::Py::<crate::types::PyBytes>::from_owned_ptr(
62                    ob.py(),
63                    ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr()),
64                )
65            };
66
67            // Create an OsStr view into the raw bytes from Python
68            #[cfg(target_os = "wasi")]
69            let os_str: &OsStr =
70                std::os::wasi::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py()));
71            #[cfg(not(target_os = "wasi"))]
72            let os_str: &OsStr =
73                std::os::unix::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py()));
74
75            Ok(os_str.to_os_string())
76        }
77
78        #[cfg(windows)]
79        {
80            use crate::types::string::PyStringMethods;
81
82            // Take the quick and easy shortcut if UTF-8
83            if let Ok(utf8_string) = pystring.to_cow() {
84                return Ok(utf8_string.into_owned().into());
85            }
86
87            // Get an owned allocated wide char buffer from PyString, which we have to deallocate
88            // ourselves
89            let size =
90                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) };
91            crate::err::error_on_minusone(ob.py(), size)?;
92
93            let mut buffer = vec![0; size as usize];
94            let bytes_read =
95                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) };
96            assert_eq!(bytes_read, size);
97
98            // Copy wide char buffer into OsString
99            let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer);
100
101            Ok(os_string)
102        }
103    }
104}
105
106impl IntoPy<PyObject> for &'_ OsStr {
107    #[inline]
108    fn into_py(self, py: Python<'_>) -> PyObject {
109        self.to_object(py)
110    }
111}
112
113impl ToPyObject for Cow<'_, OsStr> {
114    #[inline]
115    fn to_object(&self, py: Python<'_>) -> PyObject {
116        (self as &OsStr).to_object(py)
117    }
118}
119
120impl IntoPy<PyObject> for Cow<'_, OsStr> {
121    #[inline]
122    fn into_py(self, py: Python<'_>) -> PyObject {
123        self.to_object(py)
124    }
125}
126
127impl ToPyObject for OsString {
128    #[inline]
129    fn to_object(&self, py: Python<'_>) -> PyObject {
130        (self as &OsStr).to_object(py)
131    }
132}
133
134impl IntoPy<PyObject> for OsString {
135    fn into_py(self, py: Python<'_>) -> PyObject {
136        self.to_object(py)
137    }
138}
139
140impl<'a> IntoPy<PyObject> for &'a OsString {
141    fn into_py(self, py: Python<'_>) -> PyObject {
142        self.to_object(py)
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use crate::types::{PyAnyMethods, PyStringMethods};
149    use crate::{types::PyString, IntoPy, PyObject, Python, ToPyObject};
150    use std::fmt::Debug;
151    use std::{
152        borrow::Cow,
153        ffi::{OsStr, OsString},
154    };
155
156    #[test]
157    #[cfg(not(windows))]
158    fn test_non_utf8_conversion() {
159        Python::with_gil(|py| {
160            #[cfg(not(target_os = "wasi"))]
161            use std::os::unix::ffi::OsStrExt;
162            #[cfg(target_os = "wasi")]
163            use std::os::wasi::ffi::OsStrExt;
164
165            // this is not valid UTF-8
166            let payload = &[250, 251, 252, 253, 254, 255, 0, 255];
167            let os_str = OsStr::from_bytes(payload);
168
169            // do a roundtrip into Pythonland and back and compare
170            let py_str: PyObject = os_str.into_py(py);
171            let os_str_2: OsString = py_str.extract(py).unwrap();
172            assert_eq!(os_str, os_str_2);
173        });
174    }
175
176    #[test]
177    fn test_topyobject_roundtrip() {
178        Python::with_gil(|py| {
179            fn test_roundtrip<T: ToPyObject + AsRef<OsStr> + Debug>(py: Python<'_>, obj: T) {
180                let pyobject = obj.to_object(py);
181                let pystring = pyobject.downcast_bound::<PyString>(py).unwrap();
182                assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy());
183                let roundtripped_obj: OsString = pystring.extract().unwrap();
184                assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str());
185            }
186            let os_str = OsStr::new("Hello\0\nšŸ");
187            test_roundtrip::<&OsStr>(py, os_str);
188            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Borrowed(os_str));
189            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Owned(os_str.to_os_string()));
190            test_roundtrip::<OsString>(py, os_str.to_os_string());
191        });
192    }
193
194    #[test]
195    fn test_intopy_roundtrip() {
196        Python::with_gil(|py| {
197            fn test_roundtrip<T: IntoPy<PyObject> + AsRef<OsStr> + Debug + Clone>(
198                py: Python<'_>,
199                obj: T,
200            ) {
201                let pyobject = obj.clone().into_py(py);
202                let pystring = pyobject.downcast_bound::<PyString>(py).unwrap();
203                assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy());
204                let roundtripped_obj: OsString = pystring.extract().unwrap();
205                assert!(obj.as_ref() == roundtripped_obj.as_os_str());
206            }
207            let os_str = OsStr::new("Hello\0\nšŸ");
208            test_roundtrip::<&OsStr>(py, os_str);
209            test_roundtrip::<OsString>(py, os_str.to_os_string());
210            test_roundtrip::<&OsString>(py, &os_str.to_os_string());
211        })
212    }
213}