Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 50 additions & 4 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5996,7 +5996,19 @@ def test_kernel_image(self, dtype, device):
def test_kernel_video(self):
check_kernel(F.adjust_sharpness_video, make_video(), sharpness_factor=0.5)

@pytest.mark.parametrize("make_input", [make_image_tensor, make_image, make_image_pil, make_video])
@pytest.mark.parametrize(
"make_input",
[
make_image_tensor,
make_image,
make_image_pil,
make_video,
pytest.param(
make_image_cvcuda,
marks=pytest.mark.needs_cvcuda,
),
],
)
def test_functional(self, make_input):
check_functional(F.adjust_sharpness, make_input(), sharpness_factor=0.5)

Expand All @@ -6007,12 +6019,31 @@ def test_functional(self, make_input):
(F._color._adjust_sharpness_image_pil, PIL.Image.Image),
(F.adjust_sharpness_image, tv_tensors.Image),
(F.adjust_sharpness_video, tv_tensors.Video),
pytest.param(
F._color._adjust_sharpness_image_cvcuda,
None,
marks=pytest.mark.needs_cvcuda,
),
],
)
def test_functional_signature(self, kernel, input_type):
if kernel is F._color._adjust_sharpness_image_cvcuda:
input_type = _import_cvcuda().Tensor
check_functional_kernel_signature_match(F.adjust_sharpness, kernel=kernel, input_type=input_type)

@pytest.mark.parametrize("make_input", [make_image_tensor, make_image_pil, make_image, make_video])
@pytest.mark.parametrize(
"make_input",
[
make_image_tensor,
make_image_pil,
make_image,
make_video,
pytest.param(
make_image_cvcuda,
marks=pytest.mark.needs_cvcuda,
),
],
)
def test_transform(self, make_input):
check_transform(transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1), make_input())

Expand All @@ -6024,13 +6055,28 @@ def test_functional_error(self):
F.adjust_sharpness(make_image(), sharpness_factor=-1)

@pytest.mark.parametrize("sharpness_factor", [0.1, 0.5, 1.0])
@pytest.mark.parametrize(
"make_input",
[
make_image,
pytest.param(
make_image_cvcuda,
marks=pytest.mark.needs_cvcuda,
),
],
)
@pytest.mark.parametrize(
"fn", [F.adjust_sharpness, transform_cls_to_functional(transforms.RandomAdjustSharpness, p=1)]
)
def test_correctness_image(self, sharpness_factor, fn):
image = make_image(dtype=torch.uint8, device="cpu")
def test_correctness_image(self, sharpness_factor, make_input, fn):
image = make_input(dtype=torch.uint8, device="cpu")

actual = fn(image, sharpness_factor=sharpness_factor)

if make_input == make_image_cvcuda:
actual = F.cvcuda_to_tensor(actual)[0].cpu()
image = F.cvcuda_to_tensor(image)[0].cpu()

expected = F.to_image(F.adjust_sharpness(F.to_pil_image(image), sharpness_factor=sharpness_factor))

assert_equal(actual, expected)
Expand Down
3 changes: 3 additions & 0 deletions torchvision/transforms/v2/_color.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import torch
from torchvision import transforms as _transforms
from torchvision.transforms.v2 import functional as F, Transform
from torchvision.transforms.v2.functional._utils import _is_cvcuda_tensor

from ._transform import _RandomApplyTransform
from ._utils import query_chw
Expand Down Expand Up @@ -369,6 +370,8 @@ class RandomAdjustSharpness(_RandomApplyTransform):

_v1_transform_cls = _transforms.RandomAdjustSharpness

_transformed_types = _RandomApplyTransform._transformed_types + (_is_cvcuda_tensor,)

def __init__(self, sharpness_factor: float, p: float = 0.5) -> None:
super().__init__(p=p)
self.sharpness_factor = sharpness_factor
Expand Down
5 changes: 3 additions & 2 deletions torchvision/transforms/v2/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from torchvision.transforms.transforms import _check_sequence_input, _setup_angle, _setup_size # noqa: F401
from torchvision.transforms.v2.functional import get_dimensions, get_size, is_pure_tensor
from torchvision.transforms.v2.functional._utils import _FillType, _FillTypeJIT
from torchvision.transforms.v2.functional._utils import _FillType, _FillTypeJIT, _is_cvcuda_tensor


def _setup_number_or_seq(arg: int | float | Sequence[int | float], name: str) -> Sequence[float]:
Expand Down Expand Up @@ -182,7 +182,7 @@ def query_chw(flat_inputs: list[Any]) -> tuple[int, int, int]:
chws = {
tuple(get_dimensions(inpt))
for inpt in flat_inputs
if check_type(inpt, (is_pure_tensor, tv_tensors.Image, PIL.Image.Image, tv_tensors.Video))
if check_type(inpt, (is_pure_tensor, tv_tensors.Image, PIL.Image.Image, tv_tensors.Video, _is_cvcuda_tensor))
}
if not chws:
raise TypeError("No image or video was found in the sample")
Expand All @@ -207,6 +207,7 @@ def query_size(flat_inputs: list[Any]) -> tuple[int, int]:
tv_tensors.Mask,
tv_tensors.BoundingBoxes,
tv_tensors.KeyPoints,
_is_cvcuda_tensor,
),
)
}
Expand Down
92 changes: 91 additions & 1 deletion torchvision/transforms/v2/functional/_color.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import TYPE_CHECKING

import PIL.Image
import torch
from torch.nn.functional import conv2d
Expand All @@ -9,7 +11,13 @@

from ._misc import _num_value_bits, to_dtype_image
from ._type_conversion import pil_to_tensor, to_pil_image
from ._utils import _get_kernel, _register_kernel_internal
from ._utils import _get_kernel, _import_cvcuda, _is_cvcuda_available, _register_kernel_internal


CVCUDA_AVAILABLE = _is_cvcuda_available()

if TYPE_CHECKING:
import cvcuda # type: ignore[import-not-found]


def rgb_to_grayscale(inpt: torch.Tensor, num_output_channels: int = 1) -> torch.Tensor:
Expand Down Expand Up @@ -286,6 +294,88 @@ def adjust_sharpness_video(video: torch.Tensor, sharpness_factor: float) -> torc
return adjust_sharpness_image(video, sharpness_factor=sharpness_factor)


_max_value_map: dict["cvcuda.Type", float | int] = {}
_dtype_to_format: dict[tuple["cvcuda.Type", int], "cvcuda.Format"] = {}


def _adjust_sharpness_image_cvcuda(
image: "cvcuda.Tensor",
sharpness_factor: float,
) -> "cvcuda.Tensor":
cvcuda = _import_cvcuda()

if len(_max_value_map) == 0:
_max_value_map[cvcuda.Type.U8] = 255
_max_value_map[cvcuda.Type.F32] = 1.0
if len(_dtype_to_format) == 0:
_dtype_to_format[(cvcuda.Type.U8, 1)] = cvcuda.Format.U8
_dtype_to_format[(cvcuda.Type.U8, 3)] = cvcuda.Format.RGB8
_dtype_to_format[(cvcuda.Type.F32, 1)] = cvcuda.Format.F32
_dtype_to_format[(cvcuda.Type.F32, 3)] = cvcuda.Format.RGBf32

if sharpness_factor < 0:
raise ValueError(f"sharpness_factor ({sharpness_factor}) is not non-negative.")

n, h, w, c = image.shape
if c not in (1, 3):
raise TypeError(f"Input image tensor can have 1 or 3 channels, but found {c}")

if h <= 2 or w <= 2:
return image

# grab the constants like in the torchvision
bound = _max_value_map[image.dtype]
fp = image.dtype == cvcuda.Type.F32
img_format = _dtype_to_format.get((image.dtype, c))
if img_format is None:
raise TypeError(f"Unsupported dtype/channel combination: {image.dtype}, {c} channels")

# conv2d requires ImageBatchVarShape, so we split the batch into individual images
# CV-CUDA has no split, so use zero-copy and torch
batch = cvcuda.ImageBatchVarShape(capacity=n)
for tensor in torch.as_tensor(image.cuda()).split(1, dim=0):
cv_image = cvcuda.as_image(tensor, format=img_format)
batch.pushback(cv_image)

# create kernel same as adjust_sharpness_image
a, b = 1.0 / 13.0, 5.0 / 13.0
torch_kernel = torch.tensor([[a, a, a], [a, b, a], [a, a, a]], dtype=torch.float32, device="cuda")
kernel_batch = cvcuda.ImageBatchVarShape(capacity=n)
for _ in range(n):
kernel_batch.pushback(cvcuda.as_image(torch_kernel, format=cvcuda.Format.F32))

# anchors of kernel for cvcuda, [-1, -1] means center of kernel
anchor_data = torch.tensor([[-1, -1]] * n, dtype=torch.int32, device="cuda")
anchor = cvcuda.as_tensor(anchor_data, "NC")

# run the sharpen operator using cvcuda.conv2d
sharpened_batch = cvcuda.conv2d(batch, kernel=kernel_batch, kernel_anchor=anchor, border=cvcuda.Border.REPLICATE)
sharpened_list = []
for sharpened_img in sharpened_batch:
tensor = cvcuda.as_tensor(sharpened_img.cuda(), cvcuda.TensorLayout.HWC)
sharpened_list.append(tensor)
sharpened = cvcuda.stack(sharpened_list)

# handle the final blend operations using zero-copy from the adjust_sharpness_image
blurred_degenerate = torch.as_tensor(sharpened.cuda())
output = torch.as_tensor(image.cuda()).to(dtype=torch.float32, copy=True)
if not fp:
blurred_degenerate = blurred_degenerate.round()
view = output[:, 1:-1, 1:-1, :]
blurred_inner = blurred_degenerate[:, 1:-1, 1:-1, :]
view.add_(blurred_inner.sub(view), alpha=(1.0 - sharpness_factor))
output = output.clamp_(0, bound)
if not fp:
output = output.to(torch.uint8)

# convert back to cvcuda.Tensor
return cvcuda.as_tensor(output.contiguous(), cvcuda.TensorLayout.NHWC)


if CVCUDA_AVAILABLE:
_register_kernel_internal(adjust_sharpness, _import_cvcuda().Tensor)(_adjust_sharpness_image_cvcuda)


def adjust_hue(inpt: torch.Tensor, hue_factor: float) -> torch.Tensor:
"""Adjust hue"""
if torch.jit.is_scripting():
Expand Down