代码收藏家技术教程 2024-07-16

用Python将深度图和RGB图像转换为点云

1、深度图（.png）转点云（有内参）

import numpy as np
import open3d as o3d
from PIL import Image

def depth_to_point_cloud(depth_map, fx, fy, cx, cy):
    # h, w = depth_map.shape
    h, w = 1184, 1600
    points = []
    for v in range(h):
        for u in range(w):
            Z = depth_map[v, u]
            X = (u - cx) * Z / fx
            Y = (v - cy) * Z / fy
            points.append([X, Y, Z])
    return np.array(points)

depth_path = r'./depth.png'
depth_map = Image.open(depth_path).convert("L")  # 替换成自己的路径
depth_map = np.array(depth_map)

fx = 2892.33
fy = 2883.18
cx = 823.205
cy = 619.071

points = depth_to_point_cloud(depth_map, fx, fy, cx, cy)
# print(points)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
o3d.io.write_point_cloud('./output.ply', pcd)
# o3d.io.write_point_cloud("./output.pcd", pcd)

2、深度图（.pfm）转点云（有内参）

import re
import numpy as np
import open3d as o3d

def read_pfm(file_path):
    with open(file_path, 'rb') as f:
        color = None
        width = None
        height = None
        scale = None
        endian = None

        header = f.readline().decode('utf-8').rstrip()
        if header == 'PF':
            color = True
        elif header == 'Pf':
            color = False
        else:
            raise Exception('Not a PFM file.')

        dim_match = re.match(r'^(\d+)\s(\d+)\s$', f.readline().decode('utf-8'))
        if dim_match:
            width, height = map(int, dim_match.groups())
        else:
            raise Exception('Malformed PFM header.')

        scale = float(f.readline().decode('utf-8').rstrip())
        if scale < 0:
            endian = '<'
            scale = -scale
        else:
            endian = '>'

        depth_data = np.fromfile(f, endian + 'f')
        shape = (height, width, 3) if color else (height, width)

        depth_data = np.reshape(depth_data, shape)
        depth_data = np.flipud(depth_data)
        return depth_data

def depth_to_point_cloud(depth_map, fx, fy, cx, cy):
    h, w = depth_map.shape
    points = []
    for v in range(h):
        for u in range(w):
            Z = depth_map[v, u]
            X = (u - cx) * Z / fx
            Y = (v - cy) * Z / fy
            points.append([X, Y, Z])
    return np.array(points)

# 读取深度图
depth_path = r"./depth.pfm"
depth_map = read_pfm(depth_path)

fx = 2892.33
fy = 2883.18
cx = 823.205
cy = 619.071

# 转换为点云
points = depth_to_point_cloud(depth_map, fx, fy, cx, cy)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
o3d.io.write_point_cloud("./output.ply", pcd)
# o3d.io.write_point_cloud("./output.pcd", pcd)

3、深度图（.png）+RGB图像转点云（无内参）

# 转换需要depth图像和rgb图像尺寸一致，若不一致需要先resize成一样的大小
from PIL import Image
img = Image.open('./depth.png')
resized_img = img.resize((1600, 1184))
resized_img.save('./depth.png')


import open3d as o3d
import matplotlib.pyplot as plt  # plt 用于显示图片
import numpy as np

rgb_path = r'./rgb.png'
depth_path = r'./depth.png'

color_raw = o3d.io.read_image(rgb_path)
depth_raw = o3d.io.read_point_cloud(depth_path)
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(color_raw, depth_raw, depth_scale=1000.0,depth_trunc=3, convert_rgb_to_intensity=False)
plt.subplot(1, 2, 1)
plt.title('read_depth')
plt.imshow(rgbd_image.color)
plt.subplot(1, 2, 2)
plt.title('depth image')
plt.imshow(rgbd_image.depth)
plt.show()
# 若要查看自己的深度图值是多少，使用下面的np函数显示
print(np.asarray(rgbd_image.depth))
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(
    o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
# o3d.visualization.draw_geometries([pcd])
o3d.io.write_point_cloud('./output.ply', pcd)

4、深度图（.pfm）+RGB图像转点云（有内参）

import numpy as np
import open3d as o3d
import cv2

def read_pfm(fpath, expected_identifier="Pf"):
    # PFM format definition: http://netpbm.sourceforge.net/doc/pfm.html

    def _get_next_line(f):
        next_line = f.readline().decode('utf-8').rstrip()
        # ignore comments
        while next_line.startswith('#'):
            next_line = f.readline().rstrip()
        return next_line

    with open(fpath, 'rb') as f:
        #  header
        identifier = _get_next_line(f)
        if identifier != expected_identifier:
            raise Exception('Unknown identifier. Expected: "%s", got: "%s".' % (expected_identifier, identifier))

        try:
            line_dimensions = _get_next_line(f)
            dimensions = line_dimensions.split(' ')
            width = int(dimensions[0].strip())
            height = int(dimensions[1].strip())
        except:
            raise Exception('Could not parse dimensions: "%s". '
                            'Expected "width height", e.g. "512 512".' % line_dimensions)

        try:
            line_scale = _get_next_line(f)
            scale = float(line_scale)
            assert scale != 0
            if scale < 0:
                endianness = "<"
            else:
                endianness = ">"
        except:
            raise Exception('Could not parse max value / endianess information: "%s". '
                            'Should be a non-zero number.' % line_scale)

        try:
            data = np.fromfile(f, "%sf" % endianness)
            data = np.reshape(data, (height, width))
            data = np.flipud(data)
            with np.errstate(invalid="ignore"):
                data *= abs(scale)
        except:
            raise Exception('Invalid binary values. Could not create %dx%d array from input.' % (height, width))

        return data

def reconstruct_point_cloud(rgb_image, depth_image, fx, fy, cx, cy):
    # 获取图像尺寸
    height, width = depth_image.shape

    # 创建点云坐标
    y_coords, x_coords = np.indices((height, width))
    x_coords = x_coords.reshape(-1)
    y_coords = y_coords.reshape(-1)
    z_coords = depth_image.reshape(-1)
    points_x = []
    points_y = []
    points_z = []
    colors = []
    for x, y in zip(x_coords, y_coords):
        [b, g, r] = rgb_image[y][x]  #  rgb是三通道的BGR格式图，所以读取顺序要顺序留意。
        if r == 0 and g == 0 and b == 0:
            continue
        depth = depth_image[y][x]
        if depth == 0:  # 如果深度值为 0，表示无效值，跳过当前点
            continue
        X = (x - cx) * depth / fx
        Y = (y - cy) * depth / fy
        points_x.append(X)
        # points_y.append(1023 - Y)  # 平移y
        points_y.append(Y)
        points_z.append(depth)
        color = [r / 255.0, g / 255.0, b / 255.0]
        colors.append(color)

    # 创建点云
    points = np.column_stack((points_x, points_y, points_z))
    # colors = rgb_image.reshape(-1, 3) / 255.0
    # print(colors)

    point_cloud = o3d.geometry.PointCloud()
    point_cloud.points = o3d.utility.Vector3dVector(points)
    point_cloud.colors = o3d.utility.Vector3dVector(colors)

    return point_cloud

fx = 2892.33
fy = 2883.18
cx = 823.205
cy = 619.071

rgb_image_path = r'./rgb.png'
depth_image_path = r'./depth.pfm'
save_path = r"./output.ply"

rgb_image = cv2.imread(rgb_image_path)
# depth_image = cv2.imread(depth_image_path, cv2.IMREAD_GRAYSCALE)
depth_image = read_pfm(depth_image_path)
point_cloud = reconstruct_point_cloud(rgb_image, depth_image, fx, fy, cx, cy)
point_size = np.array(point_cloud.points).shape[0]
print(point_size)
o3d.io.write_point_cloud(save_path, point_cloud)
# o3d.visualization.draw_geometries([point_cloud])

5、深度图 pfm格式转为png格式

from PIL import Image
import numpy as np
import imageio.v3 as iio
import numpy as np
import matplotlib.pyplot as plt
import open3d as o3d


def write_pfm(data, fpath, scale=1, file_identifier=b'Pf', dtype="float32"):
    # PFM format definition: http://netpbm.sourceforge.net/doc/pfm.html

    data = np.flipud(data)
    height, width = np.shape(data)[:2]
    values = np.ndarray.flatten(np.asarray(data, dtype=dtype))
    endianess = data.dtype.byteorder
    print(endianess)

    if endianess == '<' or (endianess == '=' and sys.byteorder == 'little'):
        scale *= -1

    with open(fpath, 'wb') as file:
        file.write((file_identifier))
        file.write(('\n%d %d\n' % (width, height)).encode())
        file.write(('%d\n' % scale).encode())

        file.write(values)

def read_pfm(fpath, expected_identifier="Pf"):
    # PFM format definition: http://netpbm.sourceforge.net/doc/pfm.html

    def _get_next_line(f):
        next_line = f.readline().decode('utf-8').rstrip()
        # ignore comments
        while next_line.startswith('#'):
            next_line = f.readline().rstrip()
        return next_line

    with open(fpath, 'rb') as f:
        #  header
        identifier = _get_next_line(f)
        if identifier != expected_identifier:
            raise Exception('Unknown identifier. Expected: "%s", got: "%s".' % (expected_identifier, identifier))

        try:
            line_dimensions = _get_next_line(f)
            dimensions = line_dimensions.split(' ')
            width = int(dimensions[0].strip())
            height = int(dimensions[1].strip())
        except:
            raise Exception('Could not parse dimensions: "%s". '
                            'Expected "width height", e.g. "512 512".' % line_dimensions)

        try:
            line_scale = _get_next_line(f)
            scale = float(line_scale)
            assert scale != 0
            if scale < 0:
                endianness = "<"
            else:
                endianness = ">"
        except:
            raise Exception('Could not parse max value / endianess information: "%s". '
                            'Should be a non-zero number.' % line_scale)

        try:
            data = np.fromfile(f, "%sf" % endianness)
            data = np.reshape(data, (height, width))
            data = np.flipud(data)
            with np.errstate(invalid="ignore"):
                data *= abs(scale)
        except:
            raise Exception('Invalid binary values. Could not create %dx%d array from input.' % (height, width))

        return data

pfm_path = r'/home/czh/CVP-MVSNet_backup/outputs/dtu_dataset-nsrc2/scan1/rgbd2ply/00000000.pfm'
save_path = '/home/czh/CVP-MVSNet_backup/outputs/dtu_dataset-nsrc2/scan1/rgbd2ply/pfm2png000.png'
data = read_pfm(pfm_path)
data_max = np.max(data)
data_min = np.min(data)
print("Max: ", data_max)
print("Min: ", data_min)

depth_instensity = np.array(256 * (data-463) / 512, dtype=np.uint8)  # 这里是改变深度表示的范围，为了可视化。 463为数据中的最小值，相当于把数据范围最小值变为0； 除512是因为 最大值-最小值 的值约等于500 512的表示可以覆盖掉这500个数据 
iio.imwrite(save_path, depth_instensity)

作者：菜园狸花喵