use rustpython_wtf8::Wtf8;

#[derive(Debug, Clone, Copy)]
pub struct StringCursor {
    pub(crate) ptr: *const u8,
    pub position: usize,
}

impl Default for StringCursor {
    fn default() -> Self {
        Self {
            ptr: std::ptr::null(),
            position: 0,
        }
    }
}

pub trait StrDrive: Copy {
    fn count(&self) -> usize;
    fn create_cursor(&self, n: usize) -> StringCursor;
    fn adjust_cursor(&self, cursor: &mut StringCursor, n: usize);
    fn advance(cursor: &mut StringCursor) -> u32;
    fn peek(cursor: &StringCursor) -> u32;
    fn skip(cursor: &mut StringCursor, n: usize);
    fn back_advance(cursor: &mut StringCursor) -> u32;
    fn back_peek(cursor: &StringCursor) -> u32;
    fn back_skip(cursor: &mut StringCursor, n: usize);
}

impl StrDrive for &[u8] {
    #[inline]
    fn count(&self) -> usize {
        self.len()
    }

    #[inline]
    fn create_cursor(&self, n: usize) -> StringCursor {
        StringCursor {
            ptr: self[n..].as_ptr(),
            position: n,
        }
    }

    #[inline]
    fn adjust_cursor(&self, cursor: &mut StringCursor, n: usize) {
        cursor.position = n;
        cursor.ptr = self[n..].as_ptr();
    }

    #[inline]
    fn advance(cursor: &mut StringCursor) -> u32 {
        cursor.position += 1;
        unsafe { cursor.ptr = cursor.ptr.add(1) };
        unsafe { *cursor.ptr as u32 }
    }

    #[inline]
    fn peek(cursor: &StringCursor) -> u32 {
        unsafe { *cursor.ptr as u32 }
    }

    #[inline]
    fn skip(cursor: &mut StringCursor, n: usize) {
        cursor.position += n;
        unsafe { cursor.ptr = cursor.ptr.add(n) };
    }

    #[inline]
    fn back_advance(cursor: &mut StringCursor) -> u32 {
        cursor.position -= 1;
        unsafe { cursor.ptr = cursor.ptr.sub(1) };
        unsafe { *cursor.ptr as u32 }
    }

    #[inline]
    fn back_peek(cursor: &StringCursor) -> u32 {
        unsafe { *cursor.ptr.offset(-1) as u32 }
    }

    #[inline]
    fn back_skip(cursor: &mut StringCursor, n: usize) {
        cursor.position -= n;
        unsafe { cursor.ptr = cursor.ptr.sub(n) };
    }
}

impl StrDrive for &str {
    #[inline]
    fn count(&self) -> usize {
        self.chars().count()
    }

    #[inline]
    fn create_cursor(&self, n: usize) -> StringCursor {
        let mut cursor = StringCursor {
            ptr: self.as_ptr(),
            position: 0,
        };
        Self::skip(&mut cursor, n);
        cursor
    }

    #[inline]
    fn adjust_cursor(&self, cursor: &mut StringCursor, n: usize) {
        if cursor.ptr.is_null() || cursor.position > n {
            *cursor = Self::create_cursor(self, n);
        } else if cursor.position < n {
            Self::skip(cursor, n - cursor.position);
        }
    }

    #[inline]
    fn advance(cursor: &mut StringCursor) -> u32 {
        cursor.position += 1;
        unsafe { next_code_point(&mut cursor.ptr) }
    }

    #[inline]
    fn peek(cursor: &StringCursor) -> u32 {
        let mut ptr = cursor.ptr;
        unsafe { next_code_point(&mut ptr) }
    }

    #[inline]
    fn skip(cursor: &mut StringCursor, n: usize) {
        cursor.position += n;
        for _ in 0..n {
            unsafe { next_code_point(&mut cursor.ptr) };
        }
    }

    #[inline]
    fn back_advance(cursor: &mut StringCursor) -> u32 {
        cursor.position -= 1;
        unsafe { next_code_point_reverse(&mut cursor.ptr) }
    }

    #[inline]
    fn back_peek(cursor: &StringCursor) -> u32 {
        let mut ptr = cursor.ptr;
        unsafe { next_code_point_reverse(&mut ptr) }
    }

    #[inline]
    fn back_skip(cursor: &mut StringCursor, n: usize) {
        cursor.position -= n;
        for _ in 0..n {
            unsafe { next_code_point_reverse(&mut cursor.ptr) };
        }
    }
}

impl StrDrive for &Wtf8 {
    #[inline]
    fn count(&self) -> usize {
        self.code_points().count()
    }

    #[inline]
    fn create_cursor(&self, n: usize) -> StringCursor {
        let mut cursor = StringCursor {
            ptr: self.as_bytes().as_ptr(),
            position: 0,
        };
        Self::skip(&mut cursor, n);
        cursor
    }

    #[inline]
    fn adjust_cursor(&self, cursor: &mut StringCursor, n: usize) {
        if cursor.ptr.is_null() || cursor.position > n {
            *cursor = Self::create_cursor(self, n);
        } else if cursor.position < n {
            Self::skip(cursor, n - cursor.position);
        }
    }

    #[inline]
    fn advance(cursor: &mut StringCursor) -> u32 {
        cursor.position += 1;
        unsafe { next_code_point(&mut cursor.ptr) }
    }

    #[inline]
    fn peek(cursor: &StringCursor) -> u32 {
        let mut ptr = cursor.ptr;
        unsafe { next_code_point(&mut ptr) }
    }

    #[inline]
    fn skip(cursor: &mut StringCursor, n: usize) {
        cursor.position += n;
        for _ in 0..n {
            unsafe { next_code_point(&mut cursor.ptr) };
        }
    }

    #[inline]
    fn back_advance(cursor: &mut StringCursor) -> u32 {
        cursor.position -= 1;
        unsafe { next_code_point_reverse(&mut cursor.ptr) }
    }

    #[inline]
    fn back_peek(cursor: &StringCursor) -> u32 {
        let mut ptr = cursor.ptr;
        unsafe { next_code_point_reverse(&mut ptr) }
    }

    #[inline]
    fn back_skip(cursor: &mut StringCursor, n: usize) {
        cursor.position -= n;
        for _ in 0..n {
            unsafe { next_code_point_reverse(&mut cursor.ptr) };
        }
    }
}

/// Reads the next code point out of a byte iterator (assuming a
/// UTF-8-like encoding).
///
/// # Safety
///
/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string
#[inline]
const unsafe fn next_code_point(ptr: &mut *const u8) -> u32 {
    // Decode UTF-8
    let x = unsafe { **ptr };
    *ptr = unsafe { ptr.offset(1) };

    if x < 128 {
        return x as u32;
    }

    // Multibyte case follows
    // Decode from a byte combination out of: [[[x y] z] w]
    // NOTE: Performance is sensitive to the exact formulation here
    let init = utf8_first_byte(x, 2);
    // SAFETY: `bytes` produces an UTF-8-like string,
    // so the iterator must produce a value here.
    let y = unsafe { **ptr };
    *ptr = unsafe { ptr.offset(1) };
    let mut ch = utf8_acc_cont_byte(init, y);
    if x >= 0xE0 {
        // [[x y z] w] case
        // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
        // SAFETY: `bytes` produces an UTF-8-like string,
        // so the iterator must produce a value here.
        let z = unsafe { **ptr };
        *ptr = unsafe { ptr.offset(1) };
        let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
        ch = (init << 12) | y_z;
        if x >= 0xF0 {
            // [x y z w] case
            // use only the lower 3 bits of `init`
            // SAFETY: `bytes` produces an UTF-8-like string,
            // so the iterator must produce a value here.
            let w = unsafe { **ptr };
            *ptr = unsafe { ptr.offset(1) };
            ch = ((init & 7) << 18) | utf8_acc_cont_byte(y_z, w);
        }
    }

    ch
}

/// Reads the last code point out of a byte iterator (assuming a
/// UTF-8-like encoding).
///
/// # Safety
///
/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string
#[inline]
const unsafe fn next_code_point_reverse(ptr: &mut *const u8) -> u32 {
    // Decode UTF-8
    *ptr = unsafe { ptr.offset(-1) };
    let w = match unsafe { **ptr } {
        next_byte if next_byte < 128 => return next_byte as u32,
        back_byte => back_byte,
    };

    // Multibyte case follows
    // Decode from a byte combination out of: [x [y [z w]]]
    let mut ch;
    // SAFETY: `bytes` produces an UTF-8-like string,
    // so the iterator must produce a value here.
    *ptr = unsafe { ptr.offset(-1) };
    let z = unsafe { **ptr };
    ch = utf8_first_byte(z, 2);
    if utf8_is_cont_byte(z) {
        // SAFETY: `bytes` produces an UTF-8-like string,
        // so the iterator must produce a value here.
        *ptr = unsafe { ptr.offset(-1) };
        let y = unsafe { **ptr };
        ch = utf8_first_byte(y, 3);
        if utf8_is_cont_byte(y) {
            // SAFETY: `bytes` produces an UTF-8-like string,
            // so the iterator must produce a value here.
            *ptr = unsafe { ptr.offset(-1) };
            let x = unsafe { **ptr };
            ch = utf8_first_byte(x, 4);
            ch = utf8_acc_cont_byte(ch, y);
        }
        ch = utf8_acc_cont_byte(ch, z);
    }
    ch = utf8_acc_cont_byte(ch, w);

    ch
}

/// Returns the initial codepoint accumulator for the first byte.
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
/// for width 3, and 3 bits for width 4.
#[inline]
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
    (byte & (0x7F >> width)) as u32
}

/// Returns the value of `ch` updated with continuation byte `byte`.
#[inline]
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
    (ch << 6) | (byte & CONT_MASK) as u32
}

/// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
/// bits `10`).
#[inline]
const fn utf8_is_cont_byte(byte: u8) -> bool {
    (byte as i8) < -64
}

/// Mask of the value bits of a continuation byte.
const CONT_MASK: u8 = 0b0011_1111;

const fn is_py_ascii_whitespace(b: u8) -> bool {
    matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B')
}

#[inline]
pub(crate) fn is_word(ch: u32) -> bool {
    ch == '_' as u32
        || u8::try_from(ch)
            .map(|x| x.is_ascii_alphanumeric())
            .unwrap_or(false)
}
#[inline]
pub(crate) fn is_space(ch: u32) -> bool {
    u8::try_from(ch)
        .map(is_py_ascii_whitespace)
        .unwrap_or(false)
}
#[inline]
pub(crate) fn is_digit(ch: u32) -> bool {
    u8::try_from(ch)
        .map(|x| x.is_ascii_digit())
        .unwrap_or(false)
}
#[inline]
pub(crate) fn is_loc_alnum(ch: u32) -> bool {
    // FIXME: Ignore the locales
    u8::try_from(ch)
        .map(|x| x.is_ascii_alphanumeric())
        .unwrap_or(false)
}
#[inline]
pub(crate) fn is_loc_word(ch: u32) -> bool {
    ch == '_' as u32 || is_loc_alnum(ch)
}
#[inline]
pub(crate) const fn is_linebreak(ch: u32) -> bool {
    ch == '\n' as u32
}
#[inline]
pub fn lower_ascii(ch: u32) -> u32 {
    u8::try_from(ch)
        .map(|x| x.to_ascii_lowercase() as u32)
        .unwrap_or(ch)
}
#[inline]
pub(crate) fn lower_locate(ch: u32) -> u32 {
    // FIXME: Ignore the locales
    lower_ascii(ch)
}
#[inline]
pub(crate) fn upper_locate(ch: u32) -> u32 {
    // FIXME: Ignore the locales
    u8::try_from(ch)
        .map(|x| x.to_ascii_uppercase() as u32)
        .unwrap_or(ch)
}
#[inline]
pub(crate) fn is_uni_digit(ch: u32) -> bool {
    // TODO: check with cpython
    char::try_from(ch)
        .map(|x| x.is_ascii_digit())
        .unwrap_or(false)
}
#[inline]
pub(crate) fn is_uni_space(ch: u32) -> bool {
    // TODO: check with cpython
    is_space(ch)
        || matches!(
            ch,
            0x0009
                | 0x000A
                | 0x000B
                | 0x000C
                | 0x000D
                | 0x001C
                | 0x001D
                | 0x001E
                | 0x001F
                | 0x0020
                | 0x0085
                | 0x00A0
                | 0x1680
                | 0x2000
                | 0x2001
                | 0x2002
                | 0x2003
                | 0x2004
                | 0x2005
                | 0x2006
                | 0x2007
                | 0x2008
                | 0x2009
                | 0x200A
                | 0x2028
                | 0x2029
                | 0x202F
                | 0x205F
                | 0x3000
        )
}
#[inline]
pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
    matches!(
        ch,
        0x000A | 0x000B | 0x000C | 0x000D | 0x001C | 0x001D | 0x001E | 0x0085 | 0x2028 | 0x2029
    )
}
#[inline]
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
    // TODO: check with cpython
    char::try_from(ch)
        .map(|x| x.is_alphanumeric())
        .unwrap_or(false)
}
#[inline]
pub(crate) fn is_uni_word(ch: u32) -> bool {
    ch == '_' as u32 || is_uni_alnum(ch)
}
#[inline]
pub fn lower_unicode(ch: u32) -> u32 {
    // TODO: check with cpython
    char::try_from(ch)
        .map(|x| x.to_lowercase().next().unwrap() as u32)
        .unwrap_or(ch)
}
#[inline]
pub fn upper_unicode(ch: u32) -> u32 {
    // TODO: check with cpython
    char::try_from(ch)
        .map(|x| x.to_uppercase().next().unwrap() as u32)
        .unwrap_or(ch)
}