Spaces:

Surn
/

DPTDepth3D

Running

App Files Files Community

Surn commited on Feb 20

Commit

e9b0e9f

1 Parent(s): da61252

Update z axis code

Browse files

Files changed (6) hide show

.gitignore +46 -45
README.md +11 -3
app.py +28 -14
pre-requirements.txt +1 -0
requirements.txt +9 -5
web-ui.bat +1 -1

.gitignore CHANGED Viewed

@@ -1,47 +1,48 @@
-# Python build
-.eggs/
-gradio.egg-info/*
-!gradio.egg-info/requires.txt
-!gradio.egg-info/PKG-INFO
-dist/
-*.pyc
-__pycache__/
-*.py[cod]
-*$py.class
-build/
-# JS build
-gradio/templates/frontend
-# Secrets
-.env
-# Gradio run artifacts
-*.db
-*.sqlite3
-gradio/launches.json
-flagged/
-gradio_cached_examples/
-# Tests
-.coverage
-coverage.xml
-test.txt
-# Demos
-demo/tmp.zip
-demo/files/*.avi
-demo/files/*.mp4
-# Etc
-.idea/*
-.DS_Store
-*.bak
-workspace.code-workspace
-*.h5
-.vscode/
-# log files
-.pnpm-debug.log
-venv/
 *.db-journal
 /.vs

+# Python build
+.eggs/
+gradio.egg-info/*
+!gradio.egg-info/requires.txt
+!gradio.egg-info/PKG-INFO
+dist/
+*.pyc
+__pycache__/
+*.py[cod]
+*$py.class
+build/
+# JS build
+gradio/templates/frontend
+# Secrets
+.env
+# Gradio run artifacts
+*.db
+*.sqlite3
+gradio/launches.json
+flagged/
+gradio_cached_examples/
+# Tests
+.coverage
+coverage.xml
+test.txt
+# Demos
+demo/tmp.zip
+demo/files/*.avi
+demo/files/*.mp4
+models/
+# Etc
+.idea/*
+.DS_Store
+*.bak
+workspace.code-workspace
+*.h5
+.vscode/
+# log files
+.pnpm-debug.log
+venv/
 *.db-journal
 /.vs

README.md CHANGED Viewed

@@ -1,13 +1,21 @@
 ---
 title: DPT Depth Estimation + 3D
 emoji: ⚡
-colorFrom: blue
 colorTo: red
 sdk: gradio
 sdk_version: 5.16.1
 app_file: app.py
-pinned: false
-short_description: Image to 3D with DPT + 3D Point Cloud
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

 ---
 title: DPT Depth Estimation + 3D
 emoji: ⚡
+short_description: Image to 3D with DPT + 3D Point Cloud
+colorFrom: yellow
 colorTo: red
+python_version: 3.10.13
 sdk: gradio
 sdk_version: 5.16.1
 app_file: app.py
+license: apache-2.0
+tags:
+- depth
+- 3d
+hf_oauth: true
+fullWidth: false
+thumbnail: >-
+  https://cdn-uploads.huggingface.co/production/uploads/6346595c9e5f0fe83fc60444/s0fQvcoiSBlH36AXpVwPi.png
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

app.py CHANGED Viewed

@@ -12,7 +12,9 @@ from transformers import DPTForDepthEstimation, DPTImageProcessor
 image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
 model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
 def process_image(image_path, resized_width=800, z_scale=208):
     """
     Processes the input image to generate a depth map and a 3D mesh reconstruction.
@@ -47,11 +49,14 @@ def process_image(image_path, resized_width=800, z_scale=208):
         predicted_depth.unsqueeze(1),
         size=(image.height, image.width),
         mode="bicubic",
-        align_corners=True,
     ).squeeze()
     # Normalize the depth image to 8-bit
-    prediction = prediction.cpu().numpy()
     depth_min, depth_max = prediction.min(), prediction.max()
     depth_image = ((prediction - depth_min) / (depth_max - depth_min) * 255).astype("uint8")
@@ -61,9 +66,13 @@ def process_image(image_path, resized_width=800, z_scale=208):
         gltf_path = create_3d_obj(np.array(image), prediction, image_path, depth=8, z_scale=z_scale)
     img = Image.fromarray(depth_image)
-    return [img, gltf_path, gltf_path]
 def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
     """
     Creates a 3D object from RGB and depth images.
@@ -94,8 +103,8 @@ def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
     camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
         width,
         height,
-        fx=1.0,
-        fy=1.0,
         cx=width / 2.0,
         cy=height / 2.0,
     )
@@ -105,16 +114,16 @@ def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
     # Scale the Z dimension
     points = np.asarray(pcd.points)
-    depth_scaled = ((raw_depth - raw_depth.min()) / (raw_depth.max() - raw_depth.min())) * z_scale
     z_values = depth_scaled.flatten()[:len(points)]
     points[:, 2] *= z_values
     pcd.points = o3d.utility.Vector3dVector(points)
     # Estimate and orient normals
     pcd.estimate_normals(
-        search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
     )
-    pcd.orient_normals_towards_camera_location(camera_location=np.array([0.0, 0.0, 2.0 ]))
     # Apply transformations
     pcd.transform([[1, 0, 0, 0],
@@ -160,8 +169,8 @@ description = (
 )
 # Create Gradio sliders for resized_width and z_scale
 resized_width_slider = gr.Slider(
-    minimum=400,
-    maximum=1600,
     step=16,
     value=800,
     label="Resized Width",
@@ -169,15 +178,17 @@ resized_width_slider = gr.Slider(
 )
 z_scale_slider = gr.Slider(
-    minimum=160,
-    maximum=1024,
-    step=16,
-    value=208,
     label="Z-Scale",
     info="Adjust the scaling factor for the Z-axis in the 3D model."
 )
 examples = [["examples/" + img] for img in os.listdir("examples/")]
 iface = gr.Interface(
     fn=process_image,
         inputs=[
@@ -193,8 +204,11 @@ iface = gr.Interface(
     title=title,
     description=description,
     examples=examples,
     allow_flagging="never",
     cache_examples=False,
     theme="Surn/Beeuty"
 )

 image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
 model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+import spaces
+@spaces.GPU(duration=90,progress=gr.Progress(track_tqdm=True))
 def process_image(image_path, resized_width=800, z_scale=208):
     """
     Processes the input image to generate a depth map and a 3D mesh reconstruction.
         predicted_depth.unsqueeze(1),
         size=(image.height, image.width),
         mode="bicubic",
+        align_corners=False,
     ).squeeze()
     # Normalize the depth image to 8-bit
+    if torch.cuda.is_available():
+        prediction = prediction.numpy()
+    else:
+        prediction = prediction.cpu().numpy()
     depth_min, depth_max = prediction.min(), prediction.max()
     depth_image = ((prediction - depth_min) / (depth_max - depth_min) * 255).astype("uint8")
         gltf_path = create_3d_obj(np.array(image), prediction, image_path, depth=8, z_scale=z_scale)
     img = Image.fromarray(depth_image)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+    return [img, gltf_path, gltf_path]
+@spaces.GPU()
 def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
     """
     Creates a 3D object from RGB and depth images.
     camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
         width,
         height,
+        fx=z_scale,
+        fy=z_scale,
         cx=width / 2.0,
         cy=height / 2.0,
     )
     # Scale the Z dimension
     points = np.asarray(pcd.points)
+    depth_scaled = ((raw_depth - raw_depth.min()) / (raw_depth.max() - raw_depth.min())) * (z_scale*100)
     z_values = depth_scaled.flatten()[:len(points)]
     points[:, 2] *= z_values
     pcd.points = o3d.utility.Vector3dVector(points)
     # Estimate and orient normals
     pcd.estimate_normals(
+        search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=60)
     )
+    pcd.orient_normals_towards_camera_location(camera_location=np.array([0.0, 0.0, 1.5 ]))
     # Apply transformations
     pcd.transform([[1, 0, 0, 0],
 )
 # Create Gradio sliders for resized_width and z_scale
 resized_width_slider = gr.Slider(
+    minimum=256,
+    maximum=1760,
     step=16,
     value=800,
     label="Resized Width",
 )
 z_scale_slider = gr.Slider(
+    minimum=0.2,
+    maximum=3.0,
+    step=0.01,
+    value=0.5,
     label="Z-Scale",
     info="Adjust the scaling factor for the Z-axis in the 3D model."
 )
 examples = [["examples/" + img] for img in os.listdir("examples/")]
+process_image.zerogpu = True
+gr.set_static_paths(paths=["models/","examples/"])
 iface = gr.Interface(
     fn=process_image,
         inputs=[
     title=title,
     description=description,
     examples=examples,
+    examples_per_page=15,
+    flagging_mode=None,
     allow_flagging="never",
     cache_examples=False,
+    delete_cache=(86400,86400),
     theme="Surn/Beeuty"
 )

pre-requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip>=25.0.1

requirements.txt CHANGED Viewed

@@ -1,7 +1,11 @@
-torch
-transformers
 numpy
-Pillow
-gradio>=5.16.0
 jinja2
-open3d

+git+https://github.com/huggingface/diffusers.git
+git+https://github.com/huggingface/transformers.git
+safetensors
+sentencepiece
+git+https://github.com/huggingface/peft.git
 numpy
+Pillow>=11.1.0
+torch>=2.4.1
 jinja2
+open3d
+spaces

web-ui.bat CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- python311 -m app~~.py~~
2	pause


1	+ python311 -m app
2	pause