1#[cfg(not(any(PyPy, GraalPy)))]
2use crate::Py_hash_t;
3use crate::{PyObject, Py_UCS1, Py_UCS2, Py_UCS4, Py_UNICODE, Py_ssize_t};
4#[cfg(not(any(Py_3_12, GraalPy)))]
5use libc::wchar_t;
6use std::os::raw::{c_char, c_int, c_uint, c_void};
7
8#[repr(C)]
36struct BitfieldUnit<Storage> {
37 storage: Storage,
38}
39
40impl<Storage> BitfieldUnit<Storage> {
41 #[inline]
42 pub const fn new(storage: Storage) -> Self {
43 Self { storage }
44 }
45}
46
47#[cfg(not(GraalPy))]
48impl<Storage> BitfieldUnit<Storage>
49where
50 Storage: AsRef<[u8]> + AsMut<[u8]>,
51{
52 #[inline]
53 fn get_bit(&self, index: usize) -> bool {
54 debug_assert!(index / 8 < self.storage.as_ref().len());
55 let byte_index = index / 8;
56 let byte = self.storage.as_ref()[byte_index];
57 let bit_index = if cfg!(target_endian = "big") {
58 7 - (index % 8)
59 } else {
60 index % 8
61 };
62 let mask = 1 << bit_index;
63 byte & mask == mask
64 }
65
66 #[inline]
67 fn set_bit(&mut self, index: usize, val: bool) {
68 debug_assert!(index / 8 < self.storage.as_ref().len());
69 let byte_index = index / 8;
70 let byte = &mut self.storage.as_mut()[byte_index];
71 let bit_index = if cfg!(target_endian = "big") {
72 7 - (index % 8)
73 } else {
74 index % 8
75 };
76 let mask = 1 << bit_index;
77 if val {
78 *byte |= mask;
79 } else {
80 *byte &= !mask;
81 }
82 }
83
84 #[inline]
85 fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
86 debug_assert!(bit_width <= 64);
87 debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
88 debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
89 let mut val = 0;
90 for i in 0..(bit_width as usize) {
91 if self.get_bit(i + bit_offset) {
92 let index = if cfg!(target_endian = "big") {
93 bit_width as usize - 1 - i
94 } else {
95 i
96 };
97 val |= 1 << index;
98 }
99 }
100 val
101 }
102
103 #[inline]
104 fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
105 debug_assert!(bit_width <= 64);
106 debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
107 debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
108 for i in 0..(bit_width as usize) {
109 let mask = 1 << i;
110 let val_bit_is_set = val & mask == mask;
111 let index = if cfg!(target_endian = "big") {
112 bit_width as usize - 1 - i
113 } else {
114 i
115 };
116 self.set_bit(index + bit_offset, val_bit_is_set);
117 }
118 }
119}
120
121#[cfg(not(GraalPy))]
122const STATE_INTERNED_INDEX: usize = 0;
123#[cfg(not(GraalPy))]
124const STATE_INTERNED_WIDTH: u8 = 2;
125
126#[cfg(not(GraalPy))]
127const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize;
128#[cfg(not(GraalPy))]
129const STATE_KIND_WIDTH: u8 = 3;
130
131#[cfg(not(GraalPy))]
132const STATE_COMPACT_INDEX: usize = (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH) as usize;
133#[cfg(not(GraalPy))]
134const STATE_COMPACT_WIDTH: u8 = 1;
135
136#[cfg(not(GraalPy))]
137const STATE_ASCII_INDEX: usize =
138 (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH) as usize;
139#[cfg(not(GraalPy))]
140const STATE_ASCII_WIDTH: u8 = 1;
141
142#[cfg(not(any(Py_3_12, GraalPy)))]
143const STATE_READY_INDEX: usize =
144 (STATE_INTERNED_WIDTH + STATE_KIND_WIDTH + STATE_COMPACT_WIDTH + STATE_ASCII_WIDTH) as usize;
145#[cfg(not(any(Py_3_12, GraalPy)))]
146const STATE_READY_WIDTH: u8 = 1;
147
148#[repr(C)]
158#[repr(align(4))]
159struct PyASCIIObjectState {
160 bitfield_align: [u8; 0],
161 bitfield: BitfieldUnit<[u8; 4usize]>,
162}
163
164#[cfg(not(GraalPy))]
166#[allow(clippy::useless_transmute)]
167impl PyASCIIObjectState {
168 #[inline]
169 unsafe fn interned(&self) -> c_uint {
170 std::mem::transmute(
171 self.bitfield
172 .get(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH) as u32,
173 )
174 }
175
176 #[inline]
177 unsafe fn set_interned(&mut self, val: c_uint) {
178 let val: u32 = std::mem::transmute(val);
179 self.bitfield
180 .set(STATE_INTERNED_INDEX, STATE_INTERNED_WIDTH, val as u64)
181 }
182
183 #[inline]
184 unsafe fn kind(&self) -> c_uint {
185 std::mem::transmute(self.bitfield.get(STATE_KIND_INDEX, STATE_KIND_WIDTH) as u32)
186 }
187
188 #[inline]
189 unsafe fn set_kind(&mut self, val: c_uint) {
190 let val: u32 = std::mem::transmute(val);
191 self.bitfield
192 .set(STATE_KIND_INDEX, STATE_KIND_WIDTH, val as u64)
193 }
194
195 #[inline]
196 unsafe fn compact(&self) -> c_uint {
197 std::mem::transmute(self.bitfield.get(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH) as u32)
198 }
199
200 #[inline]
201 unsafe fn set_compact(&mut self, val: c_uint) {
202 let val: u32 = std::mem::transmute(val);
203 self.bitfield
204 .set(STATE_COMPACT_INDEX, STATE_COMPACT_WIDTH, val as u64)
205 }
206
207 #[inline]
208 unsafe fn ascii(&self) -> c_uint {
209 std::mem::transmute(self.bitfield.get(STATE_ASCII_INDEX, STATE_ASCII_WIDTH) as u32)
210 }
211
212 #[inline]
213 unsafe fn set_ascii(&mut self, val: c_uint) {
214 let val: u32 = std::mem::transmute(val);
215 self.bitfield
216 .set(STATE_ASCII_INDEX, STATE_ASCII_WIDTH, val as u64)
217 }
218
219 #[cfg(not(Py_3_12))]
220 #[inline]
221 unsafe fn ready(&self) -> c_uint {
222 std::mem::transmute(self.bitfield.get(STATE_READY_INDEX, STATE_READY_WIDTH) as u32)
223 }
224
225 #[cfg(not(Py_3_12))]
226 #[inline]
227 unsafe fn set_ready(&mut self, val: c_uint) {
228 let val: u32 = std::mem::transmute(val);
229 self.bitfield
230 .set(STATE_READY_INDEX, STATE_READY_WIDTH, val as u64)
231 }
232}
233
234impl From<u32> for PyASCIIObjectState {
235 #[inline]
236 fn from(value: u32) -> Self {
237 PyASCIIObjectState {
238 bitfield_align: [],
239 bitfield: BitfieldUnit::new(value.to_ne_bytes()),
240 }
241 }
242}
243
244impl From<PyASCIIObjectState> for u32 {
245 #[inline]
246 fn from(value: PyASCIIObjectState) -> Self {
247 u32::from_ne_bytes(value.bitfield.storage)
248 }
249}
250
251#[repr(C)]
252pub struct PyASCIIObject {
253 pub ob_base: PyObject,
254 #[cfg(not(GraalPy))]
255 pub length: Py_ssize_t,
256 #[cfg(not(any(PyPy, GraalPy)))]
257 pub hash: Py_hash_t,
258 #[cfg(not(GraalPy))]
270 pub state: u32,
271 #[cfg(not(any(Py_3_12, GraalPy)))]
272 pub wstr: *mut wchar_t,
273}
274
275#[cfg(not(GraalPy))]
277impl PyASCIIObject {
278 #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] #[inline]
284 pub unsafe fn interned(&self) -> c_uint {
285 PyASCIIObjectState::from(self.state).interned()
286 }
287
288 #[cfg_attr(not(Py_3_12), allow(rustdoc::broken_intra_doc_links))] #[inline]
295 pub unsafe fn set_interned(&mut self, val: c_uint) {
296 let mut state = PyASCIIObjectState::from(self.state);
297 state.set_interned(val);
298 self.state = u32::from(state);
299 }
300
301 #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
305 #[inline]
307 pub unsafe fn kind(&self) -> c_uint {
308 PyASCIIObjectState::from(self.state).kind()
309 }
310
311 #[cfg_attr(not(Py_3_12), doc = "[`PyUnicode_WCHAR_KIND`], ")]
315 #[inline]
317 pub unsafe fn set_kind(&mut self, val: c_uint) {
318 let mut state = PyASCIIObjectState::from(self.state);
319 state.set_kind(val);
320 self.state = u32::from(state);
321 }
322
323 #[inline]
327 pub unsafe fn compact(&self) -> c_uint {
328 PyASCIIObjectState::from(self.state).compact()
329 }
330
331 #[inline]
335 pub unsafe fn set_compact(&mut self, val: c_uint) {
336 let mut state = PyASCIIObjectState::from(self.state);
337 state.set_compact(val);
338 self.state = u32::from(state);
339 }
340
341 #[inline]
345 pub unsafe fn ascii(&self) -> c_uint {
346 PyASCIIObjectState::from(self.state).ascii()
347 }
348
349 #[inline]
353 pub unsafe fn set_ascii(&mut self, val: c_uint) {
354 let mut state = PyASCIIObjectState::from(self.state);
355 state.set_ascii(val);
356 self.state = u32::from(state);
357 }
358
359 #[cfg(not(Py_3_12))]
363 #[inline]
364 pub unsafe fn ready(&self) -> c_uint {
365 PyASCIIObjectState::from(self.state).ready()
366 }
367
368 #[cfg(not(Py_3_12))]
372 #[inline]
373 pub unsafe fn set_ready(&mut self, val: c_uint) {
374 let mut state = PyASCIIObjectState::from(self.state);
375 state.set_ready(val);
376 self.state = u32::from(state);
377 }
378}
379
380#[repr(C)]
381pub struct PyCompactUnicodeObject {
382 pub _base: PyASCIIObject,
383 #[cfg(not(GraalPy))]
384 pub utf8_length: Py_ssize_t,
385 #[cfg(not(GraalPy))]
386 pub utf8: *mut c_char,
387 #[cfg(not(any(Py_3_12, GraalPy)))]
388 pub wstr_length: Py_ssize_t,
389}
390
391#[repr(C)]
392pub union PyUnicodeObjectData {
393 pub any: *mut c_void,
394 pub latin1: *mut Py_UCS1,
395 pub ucs2: *mut Py_UCS2,
396 pub ucs4: *mut Py_UCS4,
397}
398
399#[repr(C)]
400pub struct PyUnicodeObject {
401 pub _base: PyCompactUnicodeObject,
402 #[cfg(not(GraalPy))]
403 pub data: PyUnicodeObjectData,
404}
405
406extern "C" {
407 #[cfg(not(any(PyPy, GraalPy)))]
408 pub fn _PyUnicode_CheckConsistency(op: *mut PyObject, check_content: c_int) -> c_int;
409}
410
411pub const SSTATE_NOT_INTERNED: c_uint = 0;
417pub const SSTATE_INTERNED_MORTAL: c_uint = 1;
418pub const SSTATE_INTERNED_IMMORTAL: c_uint = 2;
419#[cfg(Py_3_12)]
420pub const SSTATE_INTERNED_IMMORTAL_STATIC: c_uint = 3;
421
422#[cfg(not(GraalPy))]
423#[inline]
424pub unsafe fn PyUnicode_IS_ASCII(op: *mut PyObject) -> c_uint {
425 debug_assert!(crate::PyUnicode_Check(op) != 0);
426 #[cfg(not(Py_3_12))]
427 debug_assert!(PyUnicode_IS_READY(op) != 0);
428
429 (*(op as *mut PyASCIIObject)).ascii()
430}
431
432#[cfg(not(GraalPy))]
433#[inline]
434pub unsafe fn PyUnicode_IS_COMPACT(op: *mut PyObject) -> c_uint {
435 (*(op as *mut PyASCIIObject)).compact()
436}
437
438#[cfg(not(GraalPy))]
439#[inline]
440pub unsafe fn PyUnicode_IS_COMPACT_ASCII(op: *mut PyObject) -> c_uint {
441 ((*(op as *mut PyASCIIObject)).ascii() != 0 && PyUnicode_IS_COMPACT(op) != 0).into()
442}
443
444#[cfg(not(Py_3_12))]
445#[deprecated(note = "Removed in Python 3.12")]
446pub const PyUnicode_WCHAR_KIND: c_uint = 0;
447
448pub const PyUnicode_1BYTE_KIND: c_uint = 1;
449pub const PyUnicode_2BYTE_KIND: c_uint = 2;
450pub const PyUnicode_4BYTE_KIND: c_uint = 4;
451
452#[cfg(not(any(GraalPy, PyPy)))]
453#[inline]
454pub unsafe fn PyUnicode_1BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS1 {
455 PyUnicode_DATA(op) as *mut Py_UCS1
456}
457
458#[cfg(not(any(GraalPy, PyPy)))]
459#[inline]
460pub unsafe fn PyUnicode_2BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS2 {
461 PyUnicode_DATA(op) as *mut Py_UCS2
462}
463
464#[cfg(not(any(GraalPy, PyPy)))]
465#[inline]
466pub unsafe fn PyUnicode_4BYTE_DATA(op: *mut PyObject) -> *mut Py_UCS4 {
467 PyUnicode_DATA(op) as *mut Py_UCS4
468}
469
470#[cfg(not(GraalPy))]
471#[inline]
472pub unsafe fn PyUnicode_KIND(op: *mut PyObject) -> c_uint {
473 debug_assert!(crate::PyUnicode_Check(op) != 0);
474 #[cfg(not(Py_3_12))]
475 debug_assert!(PyUnicode_IS_READY(op) != 0);
476
477 (*(op as *mut PyASCIIObject)).kind()
478}
479
480#[cfg(not(GraalPy))]
481#[inline]
482pub unsafe fn _PyUnicode_COMPACT_DATA(op: *mut PyObject) -> *mut c_void {
483 if PyUnicode_IS_ASCII(op) != 0 {
484 (op as *mut PyASCIIObject).offset(1) as *mut c_void
485 } else {
486 (op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void
487 }
488}
489
490#[cfg(not(any(GraalPy, PyPy)))]
491#[inline]
492pub unsafe fn _PyUnicode_NONCOMPACT_DATA(op: *mut PyObject) -> *mut c_void {
493 debug_assert!(!(*(op as *mut PyUnicodeObject)).data.any.is_null());
494
495 (*(op as *mut PyUnicodeObject)).data.any
496}
497
498#[cfg(not(any(GraalPy, PyPy)))]
499#[inline]
500pub unsafe fn PyUnicode_DATA(op: *mut PyObject) -> *mut c_void {
501 debug_assert!(crate::PyUnicode_Check(op) != 0);
502
503 if PyUnicode_IS_COMPACT(op) != 0 {
504 _PyUnicode_COMPACT_DATA(op)
505 } else {
506 _PyUnicode_NONCOMPACT_DATA(op)
507 }
508}
509
510#[cfg(not(GraalPy))]
515#[inline]
516pub unsafe fn PyUnicode_GET_LENGTH(op: *mut PyObject) -> Py_ssize_t {
517 debug_assert!(crate::PyUnicode_Check(op) != 0);
518 #[cfg(not(Py_3_12))]
519 debug_assert!(PyUnicode_IS_READY(op) != 0);
520
521 (*(op as *mut PyASCIIObject)).length
522}
523
524#[cfg(any(Py_3_12, GraalPy))]
525#[inline]
526pub unsafe fn PyUnicode_IS_READY(_op: *mut PyObject) -> c_uint {
527 1
529}
530
531#[cfg(not(any(GraalPy, Py_3_12)))]
532#[inline]
533pub unsafe fn PyUnicode_IS_READY(op: *mut PyObject) -> c_uint {
534 (*(op as *mut PyASCIIObject)).ready()
535}
536
537#[cfg(any(Py_3_12, GraalPy))]
538#[inline]
539pub unsafe fn PyUnicode_READY(_op: *mut PyObject) -> c_int {
540 0
541}
542
543#[cfg(not(any(Py_3_12, GraalPy)))]
544#[inline]
545pub unsafe fn PyUnicode_READY(op: *mut PyObject) -> c_int {
546 debug_assert!(crate::PyUnicode_Check(op) != 0);
547
548 if PyUnicode_IS_READY(op) != 0 {
549 0
550 } else {
551 _PyUnicode_Ready(op)
552 }
553}
554
555extern "C" {
560 #[cfg_attr(PyPy, link_name = "PyPyUnicode_New")]
561 pub fn PyUnicode_New(size: Py_ssize_t, maxchar: Py_UCS4) -> *mut PyObject;
562 #[cfg_attr(PyPy, link_name = "_PyPyUnicode_Ready")]
563 pub fn _PyUnicode_Ready(unicode: *mut PyObject) -> c_int;
564
565 #[cfg(not(PyPy))]
568 pub fn PyUnicode_CopyCharacters(
569 to: *mut PyObject,
570 to_start: Py_ssize_t,
571 from: *mut PyObject,
572 from_start: Py_ssize_t,
573 how_many: Py_ssize_t,
574 ) -> Py_ssize_t;
575
576 #[cfg(not(PyPy))]
579 pub fn PyUnicode_Fill(
580 unicode: *mut PyObject,
581 start: Py_ssize_t,
582 length: Py_ssize_t,
583 fill_char: Py_UCS4,
584 ) -> Py_ssize_t;
585
586 #[cfg(not(Py_3_12))]
589 #[deprecated]
590 #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromUnicode")]
591 pub fn PyUnicode_FromUnicode(u: *const Py_UNICODE, size: Py_ssize_t) -> *mut PyObject;
592
593 #[cfg_attr(PyPy, link_name = "PyPyUnicode_FromKindAndData")]
594 pub fn PyUnicode_FromKindAndData(
595 kind: c_int,
596 buffer: *const c_void,
597 size: Py_ssize_t,
598 ) -> *mut PyObject;
599
600 #[cfg(not(Py_3_12))]
604 #[deprecated]
605 #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicode")]
606 pub fn PyUnicode_AsUnicode(unicode: *mut PyObject) -> *mut Py_UNICODE;
607
608 #[cfg(not(Py_3_12))]
611 #[deprecated]
612 #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUnicodeAndSize")]
613 pub fn PyUnicode_AsUnicodeAndSize(
614 unicode: *mut PyObject,
615 size: *mut Py_ssize_t,
616 ) -> *mut Py_UNICODE;
617
618 }
620
621extern "C" {
637 #[cfg_attr(PyPy, link_name = "PyPyUnicode_AsUTF8")]
640 pub fn PyUnicode_AsUTF8(unicode: *mut PyObject) -> *const c_char;
641
642 pub fn PyUnicode_Encode(
645 s: *const Py_UNICODE,
646 size: Py_ssize_t,
647 encoding: *const c_char,
648 errors: *const c_char,
649 ) -> *mut PyObject;
650
651 pub fn PyUnicode_EncodeUTF7(
652 data: *const Py_UNICODE,
653 length: Py_ssize_t,
654 base64SetO: c_int,
655 base64WhiteSpace: c_int,
656 errors: *const c_char,
657 ) -> *mut PyObject;
658
659 #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeUTF8")]
663 pub fn PyUnicode_EncodeUTF8(
664 data: *const Py_UNICODE,
665 length: Py_ssize_t,
666 errors: *const c_char,
667 ) -> *mut PyObject;
668
669 pub fn PyUnicode_EncodeUTF32(
670 data: *const Py_UNICODE,
671 length: Py_ssize_t,
672 errors: *const c_char,
673 byteorder: c_int,
674 ) -> *mut PyObject;
675
676 pub fn PyUnicode_EncodeUTF16(
679 data: *const Py_UNICODE,
680 length: Py_ssize_t,
681 errors: *const c_char,
682 byteorder: c_int,
683 ) -> *mut PyObject;
684
685 pub fn PyUnicode_EncodeUnicodeEscape(
689 data: *const Py_UNICODE,
690 length: Py_ssize_t,
691 ) -> *mut PyObject;
692
693 pub fn PyUnicode_EncodeRawUnicodeEscape(
694 data: *const Py_UNICODE,
695 length: Py_ssize_t,
696 ) -> *mut PyObject;
697
698 #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeLatin1")]
701 pub fn PyUnicode_EncodeLatin1(
702 data: *const Py_UNICODE,
703 length: Py_ssize_t,
704 errors: *const c_char,
705 ) -> *mut PyObject;
706
707 #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeASCII")]
710 pub fn PyUnicode_EncodeASCII(
711 data: *const Py_UNICODE,
712 length: Py_ssize_t,
713 errors: *const c_char,
714 ) -> *mut PyObject;
715
716 pub fn PyUnicode_EncodeCharmap(
717 data: *const Py_UNICODE,
718 length: Py_ssize_t,
719 mapping: *mut PyObject,
720 errors: *const c_char,
721 ) -> *mut PyObject;
722
723 pub fn PyUnicode_TranslateCharmap(
726 data: *const Py_UNICODE,
727 length: Py_ssize_t,
728 table: *mut PyObject,
729 errors: *const c_char,
730 ) -> *mut PyObject;
731
732 #[cfg_attr(PyPy, link_name = "PyPyUnicode_EncodeDecimal")]
735 pub fn PyUnicode_EncodeDecimal(
736 s: *mut Py_UNICODE,
737 length: Py_ssize_t,
738 output: *mut c_char,
739 errors: *const c_char,
740 ) -> c_int;
741
742 #[cfg_attr(PyPy, link_name = "PyPyUnicode_TransformDecimalToASCII")]
743 pub fn PyUnicode_TransformDecimalToASCII(
744 s: *mut Py_UNICODE,
745 length: Py_ssize_t,
746 ) -> *mut PyObject;
747
748 }
750
751