如何在 JavaScript 中将 COCO RLE 二进制掩码解码为图像?
P粉709307865
P粉709307865 2023-12-07 10:08:08
0
1
686

这是 COCO RLE 掩码的示例 - https://pastebin.com/ZhE2en4C

这是 YOLOv8 验证运行的输出,取自生成的 Predictions.json 文件。

我正在尝试在 JavaScript 中解码该字符串并将其呈现在画布上。编码的字符串是有效的,因为在 python 中我可以这样做:

from pycocotools import mask as coco_mask
from PIL import Image

example_prediction = {
    "image_id": "102_jpg",
    "category_id": 0,
    "bbox": [153.106, 281.433, 302.518, 130.737],
    "score": 0.8483,
    "segmentation": {
      "size": [640, 640],
      "counts": "<RLE string here>"
    }
  }

def rle_to_bitmap(rle):
  bitmap = coco_mask.decode(rle)
  return bitmap

def show_bitmap(bitmap):
  img = Image.fromarray(bitmap.astype(np.uint8) * 255, mode='L')
  img.show()
  input("Press Enter to continue...")
  img.close()
    

mask_bitmap = rle_to_bitmap(example_prediction["segmentation"])
show_bitmap(mask_bitmap)

我可以看到解码后的掩码。

是否有一个库可以用来解码 JavaScript 中的相同字符串并将其转换为 Image?我尝试深入研究 pycocotools 的源代码,但我做不到。

P粉709307865
P粉709307865

全部回复(1)
P粉024986150

您可以在画布上绘制蒙版,然后根据需要导出图像。

对于实际绘图,您可以使用两种方法:

  1. 将 RLE 解码为二进制掩码(二维矩阵或展平矩阵),然后根据该掩码绘制像素
  2. 直接从虚拟画布上的 RLE 字符串绘制蒙版,然后将其旋转 90 度并水平翻转

以下是两者的示例:

// Styling and scaling just for demo
let wrapper = document.createElement("div")
wrapper.style.cssText = `
  transform-origin: left top;
  transform: scale(8);
`
document.body.style.cssText = `
  background-color: #121212;
  margin: 0;
  overflow: hidden;
`
document.body.appendChild(wrapper)

// Helpers
function createCanvas(width, height) {
  let canvas = document.createElement("canvas")

  canvas.style.cssText = `
    border: 1px solid white;
    display: block;
    float: left;
    image-rendering: pixelated;
  `
  canvas.height = height
  canvas.width = width

  // Comment this line if you need only image sources
  wrapper.appendChild(canvas)

  return canvas
}

function randomColorRGBA() {
  return [
        Math.round(Math.random() * 255),
        Math.round(Math.random() * 255),
        Math.round(Math.random() * 255),
        255
      ]
}

// Fast array flattening (faster than Array.proto.flat())
function flatten(arr) {
  const flattened = []

  !(function flat(arr) {
    arr.forEach((el) => {
      if (Array.isArray(el)) flat(el)
      else flattened.push(el)
    })
  })(arr)

  return flattened
}

// Decode from RLE to Binary Mask
// (pass false to flat argument if you need 2d matrix output)
function decodeCocoRLE([rows, cols], counts, flat = true) {
  let pixelPosition = 0,
      binaryMask
  
  if (flat) {
    binaryMask = Array(rows * cols).fill(0)
  } else {
    binaryMask = Array.from({length: rows}, (_) => Array(cols).fill(0))
  }

  for (let i = 0, rleLength = counts.length; i  0) {
      const rowIndex = pixelPosition % rows,
            colIndex = (pixelPosition - rowIndex) / rows

      if (flat) {
        const arrayIndex = rowIndex * cols + colIndex
        binaryMask[arrayIndex] = 1
      } else {
        binaryMask[rowIndex][colIndex] = 1
      }

      pixelPosition++
      ones--
    }
  }

  if (!flat) {
    console.log("Result matrix:")
    binaryMask.forEach((row, i) => console.log(row.join(" "), `- row ${i}`))
  }

  return binaryMask
}

// 1. Draw from binary mask
function drawFromBinaryMask({size, counts}) {
  let fillColor = randomColorRGBA(),
      height = size[0],
      width = size[1]

  let canvas = createCanvas(width, height),
      canvasCtx = canvas.getContext("2d"),
      imgData = canvasCtx.getImageData(0, 0, width, height),
      pixelData = imgData.data

  // If you need matrix output (flat = false)
  // let maskFlattened = flatten(decodeCocoRLE(size, counts, false)),
  //     maskLength = maskFlattened.length;
  
  // If not - it's better to use faster approach
  let maskFlattened = decodeCocoRLE(size, counts),
      maskLength = maskFlattened.length;

  for(let i = 0; i  {
    end = start + interval * 4
    if (isOnesInterval) {
      for (let i = start; i  {
  wrapper.appendChild(image1)
}
image2.onload = () => {
  wrapper.appendChild(image2)
}

image1.src = imageSrc1
image2.src = imageSrc2
热门教程
更多>
最新下载
更多>
网站特效
网站源码
网站素材
前端模板