Instructions to use unity/inference-engine-yolo with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- unity-sentis
How to use unity/inference-engine-yolo with unity-sentis:
string modelName = "[Your model name here].sentis"; Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName); IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model); // Please see provided C# file for more details
- Notebooks
- Google Colab
- Kaggle
| using System; | |
| using System.Collections.Generic; | |
| using System.IO; | |
| using Unity.InferenceEngine; | |
| using UnityEngine; | |
| using UnityEngine.UI; | |
| using UnityEngine.Video; | |
| /* | |
| * YOLO Inference Script | |
| * ======================== | |
| * | |
| * Place this script on the Main Camera and set the script parameters according to the tooltips. | |
| * | |
| */ | |
| public class RunYOLO : MonoBehaviour | |
| { | |
| [] | |
| public ModelAsset modelAsset; | |
| [] | |
| public TextAsset classesAsset; | |
| [] | |
| public RawImage displayImage; | |
| [] | |
| public Texture2D borderTexture; | |
| [] | |
| public Font font; | |
| [] | |
| public string videoFilename = "giraffes.mp4"; | |
| const BackendType backend = BackendType.GPUCompute; | |
| private Transform displayLocation; | |
| private Worker worker; | |
| private string[] labels; | |
| private RenderTexture targetRT; | |
| private Sprite borderSprite; | |
| //Image size for the model | |
| private const int imageWidth = 640; | |
| private const int imageHeight = 640; | |
| private VideoPlayer video; | |
| List<GameObject> boxPool = new(); | |
| [] | |
| [] | |
| float iouThreshold = 0.5f; | |
| [] | |
| [] | |
| float scoreThreshold = 0.5f; | |
| Tensor<float> centersToCorners; | |
| //bounding box data | |
| public struct BoundingBox | |
| { | |
| public float centerX; | |
| public float centerY; | |
| public float width; | |
| public float height; | |
| public string label; | |
| } | |
| void Start() | |
| { | |
| Application.targetFrameRate = 60; | |
| Screen.orientation = ScreenOrientation.LandscapeLeft; | |
| //Parse neural net labels | |
| labels = classesAsset.text.Split('\n'); | |
| LoadModel(); | |
| targetRT = new RenderTexture(imageWidth, imageHeight, 0); | |
| //Create image to display video | |
| displayLocation = displayImage.transform; | |
| SetupInput(); | |
| borderSprite = Sprite.Create(borderTexture, new Rect(0, 0, borderTexture.width, borderTexture.height), new Vector2(borderTexture.width / 2, borderTexture.height / 2)); | |
| } | |
| void LoadModel() | |
| { | |
| //Load model | |
| var model1 = ModelLoader.Load(modelAsset); | |
| centersToCorners = new Tensor<float>(new TensorShape(4, 4), | |
| new float[] | |
| { | |
| 1, 0, 1, 0, | |
| 0, 1, 0, 1, | |
| -0.5f, 0, 0.5f, 0, | |
| 0, -0.5f, 0, 0.5f | |
| }); | |
| //Here we transform the output of the model1 by feeding it through a Non-Max-Suppression layer. | |
| var graph = new FunctionalGraph(); | |
| var inputs = graph.AddInputs(model1); | |
| var modelOutput = Functional.Forward(model1, inputs)[0]; //shape=(1,84,8400) | |
| var boxCoords = modelOutput[0, 0..4, ..].Transpose(0, 1); //shape=(8400,4) | |
| var allScores = modelOutput[0, 4.., ..]; //shape=(80,8400) | |
| var scores = Functional.ReduceMax(allScores, 0); //shape=(8400) | |
| var classIDs = Functional.ArgMax(allScores, 0); //shape=(8400) | |
| var boxCorners = Functional.MatMul(boxCoords, Functional.Constant(centersToCorners)); //shape=(8400,4) | |
| var indices = Functional.NMS(boxCorners, scores, iouThreshold, scoreThreshold); //shape=(N) | |
| var coords = Functional.IndexSelect(boxCoords, 0, indices); //shape=(N,4) | |
| var labelIDs = Functional.IndexSelect(classIDs, 0, indices); //shape=(N) | |
| //Create worker to run model | |
| worker = new Worker(graph.Compile(coords, labelIDs), backend); | |
| } | |
| void SetupInput() | |
| { | |
| video = gameObject.AddComponent<VideoPlayer>(); | |
| video.renderMode = VideoRenderMode.APIOnly; | |
| video.source = VideoSource.Url; | |
| video.url = Path.Join(Application.streamingAssetsPath, videoFilename); | |
| video.isLooping = true; | |
| video.Play(); | |
| } | |
| private void Update() | |
| { | |
| ExecuteML(); | |
| if (Input.GetKeyDown(KeyCode.Escape)) | |
| { | |
| Application.Quit(); | |
| } | |
| } | |
| public void ExecuteML() | |
| { | |
| ClearAnnotations(); | |
| if (video && video.texture) | |
| { | |
| float aspect = video.width * 1f / video.height; | |
| Graphics.Blit(video.texture, targetRT, new Vector2(1f / aspect, 1), new Vector2(0, 0)); | |
| displayImage.texture = targetRT; | |
| } | |
| else return; | |
| using Tensor<float> inputTensor = new Tensor<float>(new TensorShape(1, 3, imageHeight, imageWidth)); | |
| TextureConverter.ToTensor(targetRT, inputTensor, default); | |
| worker.Schedule(inputTensor); | |
| using var output = (worker.PeekOutput("output_0") as Tensor<float>).ReadbackAndClone(); | |
| using var labelIDs = (worker.PeekOutput("output_1") as Tensor<int>).ReadbackAndClone(); | |
| float displayWidth = displayImage.rectTransform.rect.width; | |
| float displayHeight = displayImage.rectTransform.rect.height; | |
| float scaleX = displayWidth / imageWidth; | |
| float scaleY = displayHeight / imageHeight; | |
| int boxesFound = output.shape[0]; | |
| //Draw the bounding boxes | |
| for (int n = 0; n < Mathf.Min(boxesFound, 200); n++) | |
| { | |
| var box = new BoundingBox | |
| { | |
| centerX = output[n, 0] * scaleX - displayWidth / 2, | |
| centerY = output[n, 1] * scaleY - displayHeight / 2, | |
| width = output[n, 2] * scaleX, | |
| height = output[n, 3] * scaleY, | |
| label = labels[labelIDs[n]], | |
| }; | |
| DrawBox(box, n, displayHeight * 0.05f); | |
| } | |
| } | |
| public void DrawBox(BoundingBox box, int id, float fontSize) | |
| { | |
| //Create the bounding box graphic or get from pool | |
| GameObject panel; | |
| if (id < boxPool.Count) | |
| { | |
| panel = boxPool[id]; | |
| panel.SetActive(true); | |
| } | |
| else | |
| { | |
| panel = CreateNewBox(Color.yellow); | |
| } | |
| //Set box position | |
| panel.transform.localPosition = new Vector3(box.centerX, -box.centerY); | |
| //Set box size | |
| RectTransform rt = panel.GetComponent<RectTransform>(); | |
| rt.sizeDelta = new Vector2(box.width, box.height); | |
| //Set label text | |
| var label = panel.GetComponentInChildren<Text>(); | |
| label.text = box.label; | |
| label.fontSize = (int)fontSize; | |
| } | |
| public GameObject CreateNewBox(Color color) | |
| { | |
| //Create the box and set image | |
| var panel = new GameObject("ObjectBox"); | |
| panel.AddComponent<CanvasRenderer>(); | |
| Image img = panel.AddComponent<Image>(); | |
| img.color = color; | |
| img.sprite = borderSprite; | |
| img.type = Image.Type.Sliced; | |
| panel.transform.SetParent(displayLocation, false); | |
| //Create the label | |
| var text = new GameObject("ObjectLabel"); | |
| text.AddComponent<CanvasRenderer>(); | |
| text.transform.SetParent(panel.transform, false); | |
| Text txt = text.AddComponent<Text>(); | |
| txt.font = font; | |
| txt.color = color; | |
| txt.fontSize = 40; | |
| txt.horizontalOverflow = HorizontalWrapMode.Overflow; | |
| RectTransform rt2 = text.GetComponent<RectTransform>(); | |
| rt2.offsetMin = new Vector2(20, rt2.offsetMin.y); | |
| rt2.offsetMax = new Vector2(0, rt2.offsetMax.y); | |
| rt2.offsetMin = new Vector2(rt2.offsetMin.x, 0); | |
| rt2.offsetMax = new Vector2(rt2.offsetMax.x, 30); | |
| rt2.anchorMin = new Vector2(0, 0); | |
| rt2.anchorMax = new Vector2(1, 1); | |
| boxPool.Add(panel); | |
| return panel; | |
| } | |
| public void ClearAnnotations() | |
| { | |
| foreach (var box in boxPool) | |
| { | |
| box.SetActive(false); | |
| } | |
| } | |
| void OnDestroy() | |
| { | |
| centersToCorners?.Dispose(); | |
| worker?.Dispose(); | |
| } | |
| } | |