Source code for mmhuman3d.core.conventions.cameras.convert_convention

import warnings
from typing import Iterable, List, Optional, Tuple, Union

import numpy as np
import torch

from mmhuman3d.utils.transforms import ee_to_rotmat, rotmat_to_ee

CAMERA_CONVENTIONS = {
    'pytorch3d': {
        'axis': '-xyz',
        'left_mm_extrinsic': False,
        'view_to_world': False,
        'left_mm_intrinsic': True,
    },
    'pyrender': {
        'axis': 'xy-z',
        'left_mm_extrinsic': True,
        'view_to_world': False,
        'left_mm_intrinsic': True,
    },
    'opengl': {
        'axis': 'xy-z',
        'left_mm_extrinsic': True,
        'view_to_world': False,
        'left_mm_intrinsic': True,
    },
    'open3d': {
        'axis': 'x-yz',
        'left_mm_extrinsic': False,
        'view_to_world': False,
        'left_mm_intrinsic': False,
    },
    'opencv': {
        'axis': 'x-yz',
        'left_mm_extrinsic': True,
        'view_to_world': True,
        'left_mm_intrinsic': True,
    },
    'unity': {
        'axis': 'xyz',
        'left_mm_extrinsic': True,
        'view_to_world': False,
        'left_mm_intrinsic': True,
    },
    'blender': {
        'axis': 'xy-z',
        'left_mm_extrinsic': True,
        'view_to_world': False,
        'left_mm_intrinsic': True,
    },
    'maya': {
        'axis': 'xy-z',
        'left_mm_extrinsic': True,
        'view_to_world': False,
        'left_mm_intrinsic': True,
    }
}


[docs]def enc_camera_convention(convention, camera_conventions=CAMERA_CONVENTIONS):
    """convert camera convention to axis direction and order."""
    if convention in camera_conventions:
        convention = camera_conventions[convention]['axis']
    else:
        assert set(convention).issubset(
            {'x', 'y', 'z', '+',
             '-'}), 'Wrong convention string, choose either in'
        f'set({camera_conventions.keys()}) or define by xyz.'
    sign = [1, 1, 1]
    convention = '_' + convention
    count = 0
    axis_order = ''
    for i in range(len(convention)):
        if convention[i] in 'xyz':
            axis_order += convention[i]
            if convention[i - 1] == '-':
                sign[count] *= -1
            count += 1
    return sign, axis_order


[docs]def convert_cameras(
    K: Optional[Union[torch.Tensor, np.ndarray]] = None,
    R: Optional[Union[torch.Tensor, np.ndarray]] = None,
    T: Optional[Union[torch.Tensor, np.ndarray]] = None,
    is_perspective: bool = True,
    convention_src: str = 'opencv',
    convention_dst: str = 'pytorch3d',
    in_ndc_src: bool = True,
    in_ndc_dst: bool = True,
    resolution_src: Optional[Union[int, Tuple[int, int], torch.Tensor,
                                   np.ndarray]] = None,
    resolution_dst: Optional[Union[int, Tuple[int, int], torch.Tensor,
                                   np.ndarray]] = None,
    camera_conventions: dict = CAMERA_CONVENTIONS,
) -> Tuple[Union[torch.Tensor, np.ndarray], Union[torch.Tensor, np.ndarray],
           Union[torch.Tensor, np.ndarray]]:
    """Convert the intrinsic matrix K and extrinsic matrix [R|T] from source
    convention to destination convention.

    Args:
        K (Union[torch.Tensor, np.ndarray]): Intrinsic matrix,
            shape should be (batch_size, 4, 4) or (batch_size, 3, 3).
            Will be ignored if None.
        R (Optional[Union[torch.Tensor, np.ndarray]], optional):
            Extrinsic rotation matrix. Shape should be (batch_size, 3, 3).
            Will be identity if None.
            Defaults to None.
        T (Optional[Union[torch.Tensor, np.ndarray]], optional):
            Extrinsic translation matrix. Shape should be (batch_size, 3).
            Will be zeros if None.
            Defaults to None.
        is_perspective (bool, optional): whether is perspective projection.
            Defaults to True.

        _____________________________________________________________________
        # Camera dependent args
        convention_src (str, optional): convention of source camera,
        convention_dst (str, optional): convention of destination camera,

        We define the convention of cameras by the order of right, front and
        up.
        E.g., the first one is pyrender and its convention should be
            '+x+z+y'. '+' could be ignored.
            The second one is opencv and its convention should be '+x-z-y'.
            The third one is pytorch3d and its convention should be '-xzy'.
                    opengl(pyrender)     opencv            pytorch3d
                    y                   z                     y
                    |                  /                      |
                    |                 /                       |
                    |_______x        /________x     x________ |
                    /                |                        /
                   /                 |                       /
                z /                y |                    z /

        in_ndc_src (bool, optional): Whether is the source camera defined
            in ndc.
            Defaults to True.
        in_ndc_dst (bool, optional): Whether is the destination camera defined
            in ndc.
            Defaults to True.

        in camera_convention, we define these args as:
            1). `left_mm_ex` means extrinsic matrix `K` is left matrix
                multiplcation defined.
            2). `left_mm_in` means intrinsic matrix [`R`| `T`] is left
                matrix multiplcation defined.
            3) `view_to_world` means extrinsic matrix [`R`| `T`] is defined
                as view to world.

        resolution_src (Optional[Union[int, Tuple[int, int], torch.Tensor,
            np.ndarray]], optional):
            Source camera image size of (height, width).
            Required if defined in screen.
            Will be square if int.
            Shape should be (2,) if `array` or `tensor`.
            Defaults to None.
        resolution_dst (Optional[Union[int, Tuple[int, int], torch.Tensor,
            np.ndarray]], optional):
            Destination camera image size of (height, width).
            Required if defined in screen.
            Will be square if int.
            Shape should be (2,) if `array` or `tensor`.
            Defaults to None.
        camera_conventions: (dict, optional): `dict` containing
            pre-defined camera convention information.
            Defaults to CAMERA_CONVENTIONS.

    Raises:
        TypeError: K, R, T should all be `torch.Tensor` or `np.ndarray`.

    Returns:
        Tuple[Union[torch.Tensor, None], Union[torch.Tensor, None],
            Union[torch.Tensor, None]]:
            Converted K, R, T matrix of `tensor`.
    """
    convention_dst = convention_dst.lower()
    convention_src = convention_src.lower()

    assert convention_dst in CAMERA_CONVENTIONS
    assert convention_src in CAMERA_CONVENTIONS

    left_mm_ex_src = CAMERA_CONVENTIONS[convention_src].get(
        'left_mm_extrinsic', True)
    view_to_world_src = CAMERA_CONVENTIONS[convention_src].get(
        'view_to_world', False)
    left_mm_in_src = CAMERA_CONVENTIONS[convention_src].get(
        'left_mm_intrinsic', False)

    left_mm_ex_dst = CAMERA_CONVENTIONS[convention_dst].get(
        'left_mm_extrinsic', True)
    view_to_world_dst = CAMERA_CONVENTIONS[convention_dst].get(
        'view_to_world', False)
    left_mm_in_dst = CAMERA_CONVENTIONS[convention_dst].get(
        'left_mm_intrinsic', False)

    sign_src, axis_src = enc_camera_convention(convention_src,
                                               camera_conventions)
    sign_dst, axis_dst = enc_camera_convention(convention_dst,
                                               camera_conventions)
    sign = torch.Tensor(sign_dst) / torch.Tensor(sign_src)

    type_ = []
    for x in [K, R, T]:
        if x is not None:
            type_.append(type(x))
    if len(type_) > 0:
        if not all(x == type_[0] for x in type_):
            raise TypeError('Input type should be the same.')

    use_numpy = False
    if np.ndarray in type_:
        use_numpy = True
    # convert raw matrix to tensor
    if isinstance(K, np.ndarray):
        new_K = torch.Tensor(K)
    elif K is None:
        new_K = None
    elif isinstance(K, torch.Tensor):
        new_K = K.clone()
    else:
        raise TypeError(
            f'K should be `torch.Tensor` or `numpy.ndarray`, type(K): '
            f'{type(K)}')

    if isinstance(R, np.ndarray):
        new_R = torch.Tensor(R).view(-1, 3, 3)
    elif R is None:
        new_R = torch.eye(3, 3)[None]
    elif isinstance(R, torch.Tensor):
        new_R = R.clone().view(-1, 3, 3)
    else:
        raise TypeError(
            f'R should be `torch.Tensor` or `numpy.ndarray`, type(R): '
            f'{type(R)}')

    if isinstance(T, np.ndarray):
        new_T = torch.Tensor(T).view(-1, 3)
    elif T is None:
        new_T = torch.zeros(1, 3)
    elif isinstance(T, torch.Tensor):
        new_T = T.clone().view(-1, 3)
    else:
        raise TypeError(
            f'T should be `torch.Tensor` or `numpy.ndarray`, type(T): '
            f'{type(T)}')

    if axis_dst != axis_src:
        new_R = ee_to_rotmat(
            rotmat_to_ee(new_R, convention=axis_src), convention=axis_dst)

    # convert extrinsic to world_to_view
    if view_to_world_src is True:
        new_R, new_T = convert_world_view(new_R, new_T)

    # right mm to left mm
    if (not left_mm_ex_src) and left_mm_ex_dst:
        new_R *= sign.to(new_R.device)
        new_R = new_R.permute(0, 2, 1)
    # left mm to right mm
    elif left_mm_ex_src and (not left_mm_ex_dst):
        new_R = new_R.permute(0, 2, 1)
        new_R *= sign.to(new_R.device)
    # right_mm to right mm
    elif (not left_mm_ex_dst) and (not left_mm_ex_src):
        new_R *= sign.to(new_R.device)
    # left mm to left mm
    elif left_mm_ex_src and left_mm_ex_dst:
        new_R *= sign.view(3, 1).to(new_R.device)
    new_T *= sign.to(new_T.device)

    # convert extrinsic to as definition
    if view_to_world_dst is True:
        new_R, new_T = convert_world_view(new_R, new_T)

    # in ndc or in screen
    if in_ndc_dst is False and in_ndc_src is True:
        assert resolution_dst is not None, \
            'dst in screen, should specify resolution_dst.'

    if in_ndc_src is False and in_ndc_dst is True:
        assert resolution_src is not None, \
            'src in screen, should specify resolution_dst.'
    if resolution_dst is None:
        resolution_dst = 2.0
    if resolution_src is None:
        resolution_src = 2.0

    if new_K is not None:
        if left_mm_in_src is False and left_mm_in_dst is True:
            new_K = new_K.permute(0, 2, 1)
        if new_K.shape[-2:] == (3, 3):
            new_K = convert_K_3x3_to_4x4(new_K, is_perspective)
        # src in ndc, dst in screen

        if in_ndc_src is True and (in_ndc_dst is False):
            new_K = convert_ndc_to_screen(
                K=new_K,
                is_perspective=is_perspective,
                sign=sign.to(new_K.device),
                resolution=resolution_dst)
        # src in screen, dst in ndc
        elif in_ndc_src is False and in_ndc_dst is True:
            new_K = convert_screen_to_ndc(
                K=new_K,
                is_perspective=is_perspective,
                sign=sign.to(new_K.device),
                resolution=resolution_src)
        # src in ndc, dst in ndc
        elif in_ndc_src is True and in_ndc_dst is True:
            if is_perspective:
                new_K[:, 0, 2] *= sign[0].to(new_K.device)
                new_K[:, 1, 2] *= sign[1].to(new_K.device)
            else:
                new_K[:, 0, 3] *= sign[0].to(new_K.device)
                new_K[:, 1, 3] *= sign[1].to(new_K.device)
        # src in screen, dst in screen
        else:
            pass

        if left_mm_in_src is True and left_mm_in_dst is False:
            new_K = new_K.permute(0, 2, 1)

        num_batch = max(new_K.shape[0], new_R.shape[0], new_T.shape[0])
        if new_K.shape[0] == 1:
            new_K = new_K.repeat(num_batch, 1, 1)
        if new_R.shape[0] == 1:
            new_R = new_R.repeat(num_batch, 1, 1)
        if new_T.shape[0] == 1:
            new_T = new_T.repeat(num_batch, 1)

    if use_numpy:
        if isinstance(new_K, torch.Tensor):
            new_K = new_K.cpu().numpy()
        if isinstance(new_R, torch.Tensor):
            new_R = new_R.cpu().numpy()
        if isinstance(new_T, torch.Tensor):
            new_T = new_T.cpu().numpy()
    return new_K, new_R, new_T


[docs]def convert_K_3x3_to_4x4(
        K: Union[torch.Tensor, np.ndarray],
        is_perspective: bool = True) -> Union[torch.Tensor, np.ndarray]:
    """Convert opencv 3x3 intrinsic matrix to 4x4.

    Args:
        K (Union[torch.Tensor, np.ndarray]):
            Input 3x3 intrinsic matrix, left mm defined.
            [[fx,   0,   px],
             [0,   fy,   py],
             [0,    0,   1]]
        is_perspective (bool, optional): whether is perspective projection.
            Defaults to True.

    Raises:
        TypeError: K is not `Tensor` or `array`.
        ValueError: Shape is not (batch, 3, 3) or (3, 3)

    Returns:
        Union[torch.Tensor, np.ndarray]:
            Output intrinsic matrix.
            for perspective:
                [[fx,   0,    px,   0],
                [0,   fy,    py,   0],
                [0,    0,    0,    1],
                [0,    0,    1,    0]]

            for orthographics:
                [[fx,   0,    0,   px],
                [0,   fy,    0,   py],
                [0,    0,    1,    0],
                [0,    0,    0,    1]]
    """
    if isinstance(K, torch.Tensor):
        K = K.clone()
    elif isinstance(K, np.ndarray):
        K = K.copy()

    else:
        raise TypeError('K should be `torch.Tensor` or `numpy.ndarray`, '
                        f'type(K): {type(K)}.')
    if K.shape[-2:] == (4, 4):
        warnings.warn(
            f'shape of K already is {K.shape}, will pass converting.')
        return K
    use_numpy = False
    if K.ndim == 2:
        K = K[None].reshape(-1, 3, 3)
    elif K.ndim == 3:
        K = K.reshape(-1, 3, 3)
    else:
        raise ValueError(f'Wrong ndim of K: {K.ndim}')

    if isinstance(K, np.ndarray):
        use_numpy = True
    if is_perspective:
        if use_numpy:
            K_ = np.zeros((K.shape[0], 4, 4))
        else:
            K_ = torch.zeros(K.shape[0], 4, 4)
        K_[:, :2, :3] = K[:, :2, :3]
        K_[:, 3, 2] = 1
        K_[:, 2, 3] = 1
    else:
        if use_numpy:
            K_ = np.eye(4, 4)[None].repeat(K.shape[0], 0)
        else:
            K_ = torch.eye(4, 4)[None].repeat(K.shape[0], 1, 1)
        K_[:, :2, :2] = K[:, :2, :2]
        K_[:, :2, 3:] = K[:, :2, 2:]
    return K_


[docs]def convert_K_4x4_to_3x3(
        K: Union[torch.Tensor, np.ndarray],
        is_perspective: bool = True) -> Union[torch.Tensor, np.ndarray]:
    """Convert opencv 4x4 intrinsic matrix to 3x3.

    Args:
        K (Union[torch.Tensor, np.ndarray]):
            Input 4x4 intrinsic matrix, left mm defined.
            for perspective:
                [[fx,   0,    px,   0],
                [0,   fy,    py,   0],
                [0,    0,    0,    1],
                [0,    0,    1,    0]]

            for orthographics:
                [[fx,   0,    0,   px],
                [0,   fy,    0,   py],
                [0,    0,    1,    0],
                [0,    0,    0,    1]]
        is_perspective (bool, optional): whether is perspective projection.
            Defaults to True.

    Raises:
        TypeError: type K should be `Tensor` or `array`.
        ValueError: Shape is not (batch, 3, 3) or (3, 3).

    Returns:
        Union[torch.Tensor, np.ndarray]:
            Output 3x3 intrinsic matrix, left mm defined.
            [[fx,   0,   px],
             [0,   fy,   py],
             [0,    0,   1]]
    """

    if isinstance(K, torch.Tensor):
        K = K.clone()
    elif isinstance(K, np.ndarray):
        K = K.copy()
    else:
        raise TypeError('K should be `torch.Tensor` or `numpy.ndarray`, '
                        f'type(K): {type(K)}.')
    if K.shape[-2:] == (3, 3):
        warnings.warn(
            f'shape of K already is {K.shape}, will pass converting.')
        return K
    use_numpy = True if isinstance(K, np.ndarray) else False
    if K.ndim == 2:
        K = K[None].reshape(-1, 4, 4)
    elif K.ndim == 3:
        K = K.reshape(-1, 4, 4)
    else:
        raise ValueError(f'Wrong ndim of K: {K.ndim}')

    if use_numpy:
        K_ = np.eye(3, 3)[None].repeat(K.shape[0], 0)
    else:
        K_ = torch.eye(3, 3)[None].repeat(K.shape[0], 1, 1)
    if is_perspective:
        K_[:, :2, :3] = K[:, :2, :3]
    else:
        K_[:, :2, :2] = K[:, :2, :2]
        K_[:, :2, 2:3] = K[:, :2, 3:4]
    return K_


[docs]def convert_ndc_to_screen(
        K: Union[torch.Tensor, np.ndarray],
        resolution: Union[int, Tuple[int, int], List[int], torch.Tensor,
                          np.ndarray],
        sign: Optional[Iterable[int]] = None,
        is_perspective: bool = True) -> Union[torch.Tensor, np.ndarray]:
    """Convert intrinsic matrix from ndc to screen.

    Args:
        K (Union[torch.Tensor, np.ndarray]):
            Input 4x4 intrinsic matrix, left mm defined.
        resolution (Union[int, Tuple[int, int], torch.Tensor, np.ndarray]):
            (height, width) of image.
        sign (Optional[Union[Iterable[int]]], optional): xyz axis sign.
            Defaults to None.
        is_perspective (bool, optional): whether is perspective projection.
            Defaults to True.

    Raises:
        TypeError: K should be Tensor or array.
        ValueError: shape of K should be (batch, 4, 4)

    Returns:
        Union[torch.Tensor, np.ndarray]: output intrinsic matrix.
    """
    sign = [1, 1, 1] if sign is None else sign
    if isinstance(K, torch.Tensor):
        K = K.clone()
    elif isinstance(K, np.ndarray):
        K = K.copy()
    else:
        raise TypeError(
            f'K should be `torch.Tensor` or `np.ndarray`, type(K): {type(K)}')
    if K.ndim == 2:
        K = K[None].reshape(-1, 4, 4)
    elif K.ndim == 3:
        K = K.reshape(-1, 4, 4)
    else:
        raise ValueError(f'Wrong ndim of K: {K.ndim}')

    if isinstance(resolution, (int, float)):
        w_dst = h_dst = resolution
    elif isinstance(resolution, (list, tuple)):
        h_dst, w_dst = resolution
    elif isinstance(resolution, (torch.Tensor, np.ndarray)):
        resolution = resolution.reshape(-1, 2)
        h_dst, w_dst = resolution[:, 0], resolution[:, 1]

    aspect_ratio = w_dst / h_dst
    K[:, 0, 0] *= w_dst / 2
    K[:, 1, 1] *= h_dst / 2
    if aspect_ratio > 1:
        K[:, 0, 0] /= aspect_ratio
    else:
        K[:, 1, 1] *= aspect_ratio
    if is_perspective:
        K[:, 0, 2] *= sign[0]
        K[:, 1, 2] *= sign[1]
        K[:, 0, 2] = (K[:, 0, 2] + 1) * (w_dst / 2)
        K[:, 1, 2] = (K[:, 1, 2] + 1) * (h_dst / 2)
    else:
        K[:, 0, 3] *= sign[0]
        K[:, 1, 3] *= sign[1]
        K[:, 0, 3] = (K[:, 0, 3] + 1) * (w_dst / 2)
        K[:, 1, 3] = (K[:, 1, 3] + 1) * (h_dst / 2)
    return K


[docs]def convert_screen_to_ndc(
        K: Union[torch.Tensor, np.ndarray],
        resolution: Union[int, Tuple[int, int], torch.Tensor, np.ndarray],
        sign: Optional[Iterable[int]] = None,
        is_perspective: bool = True) -> Union[torch.Tensor, np.ndarray]:
    """Convert intrinsic matrix from screen to ndc.

    Args:
        K (Union[torch.Tensor, np.ndarray]): input intrinsic matrix.
        resolution (Union[int, Tuple[int, int], torch.Tensor, np.ndarray]):
            (height, width) of image.
        sign (Optional[Union[Iterable[int]]], optional): xyz axis sign.
            Defaults to None.
        is_perspective (bool, optional): whether is perspective projection.
            Defaults to True.

    Raises:
        TypeError: K should be Tensor or array.
        ValueError: shape of K should be (batch, 4, 4)

    Returns:
        Union[torch.Tensor, np.ndarray]: output intrinsic matrix.
    """
    if sign is None:
        sign = [1, 1, 1]

    if isinstance(K, torch.Tensor):
        K = K.clone()
    elif isinstance(K, np.ndarray):
        K = K.copy()
    else:
        raise TypeError(
            f'K should be `torch.Tensor` or `np.ndarray`, type(K): {type(K)}')
    if K.ndim == 2:
        K = K[None].reshape(-1, 4, 4)
    elif K.ndim == 3:
        K = K.reshape(-1, 4, 4)
    else:
        raise ValueError(f'Wrong ndim of K: {K.ndim}')

    if isinstance(resolution, (int, float)):
        w_src = h_src = resolution
    elif isinstance(resolution, (list, tuple)):
        h_src, w_src = resolution
    elif isinstance(resolution, (torch.Tensor, np.ndarray)):
        resolution = resolution.reshape(-1, 2)
        h_src, w_src = resolution[:, 0], resolution[:, 1]

    aspect_ratio = w_src / h_src
    K[:, 0, 0] /= w_src / 2
    K[:, 1, 1] /= h_src / 2
    if aspect_ratio > 1:
        K[:, 0, 0] *= aspect_ratio
    else:
        K[:, 1, 1] /= aspect_ratio
    if is_perspective:
        K[:, 0, 2] = K[:, 0, 2] / (w_src / 2) - 1
        K[:, 1, 2] = K[:, 1, 2] / (h_src / 2) - 1
        K[:, 0, 2] *= sign[0]
        K[:, 1, 2] *= sign[1]
    else:
        K[:, 0, 3] = K[:, 0, 3] / (w_src / 2) - 1
        K[:, 1, 3] = K[:, 1, 3] / (h_src / 2) - 1
        K[:, 0, 3] *= sign[0]
        K[:, 1, 3] *= sign[1]
    return K


[docs]def convert_world_view(
    R: Union[torch.Tensor, np.ndarray], T: Union[torch.Tensor, np.ndarray]
) -> Tuple[Union[torch.Tensor, np.ndarray], Union[torch.Tensor, np.ndarray]]:
    """Convert between view_to_world and world_to_view defined extrinsic
    matrix.

    Args:
        R (Union[torch.Tensor, np.ndarray]): extrinsic rotation matrix.
            shape should be (batch, 3, 4)
        T (Union[torch.Tensor, np.ndarray]): extrinsic translation matrix.

    Raises:
        TypeError: R and T should be of the same type.

    Returns:
        Tuple[Union[torch.Tensor, np.ndarray], Union[torch.Tensor,
            np.ndarray]]: output R, T.
    """
    if not (type(R) is type(T)):
        raise TypeError(
            f'R: {type(R)}, T: {type(T)} should have the same type.')
    if isinstance(R, torch.Tensor):
        R = R.clone()
        T = T.clone()
        R = R.permute(0, 2, 1)
        T = -(R @ T.view(-1, 3, 1)).view(-1, 3)
    elif isinstance(R, np.ndarray):
        R = R.copy()
        T = T.copy()
        R = R.transpose(0, 2, 1)
        T = -(R @ T.reshape(-1, 3, 1)).reshape(-1, 3)
    else:
        raise TypeError(f'R: {type(R)}, T: {type(T)} should be torch.Tensor '
                        f'or numpy.ndarray.')
    return R, T