diff --git a/cmd/main.go b/cmd/main.go index f81c1692..1c29ad13 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,35 +1,41 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -package main - -import "kubevirt-gpu-device-plugin/pkg/device_plugin" - -func main() { - device_plugin.InitiateDevicePlugin() -} +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package main + +import ( + "flag" + "kubevirt-gpu-device-plugin/pkg/device_plugin" +) + +func main() { + flag.BoolVar(&device_plugin.RegisterAll, "register-all", false, + "Allow the device plugin to register multiple identical devices sharing the same IOMMU group. WARNING: Devices that belong to the same IOMMU group cannot be passed through to different VMs.") + flag.Parse() + device_plugin.InitiateDevicePlugin() +} diff --git a/pkg/device_plugin/device_plugin.go b/pkg/device_plugin/device_plugin.go index 1716ab83..cda2e7a2 100644 --- a/pkg/device_plugin/device_plugin.go +++ b/pkg/device_plugin/device_plugin.go @@ -42,7 +42,8 @@ import ( ) const ( - nvidiaVendorID = "10de" + nvidiaVendorID = "10de" + deviceIDSeparator = "|" ) // Structure to hold details about Nvidia GPU Device @@ -76,6 +77,9 @@ var readGpuIDForVgpu = readGpuIDForVgpuFunc var startVgpuDevicePlugin = startVgpuDevicePluginFunc var stop = make(chan struct{}) +// RegisterAll is set by the --register-all CLI flag +var RegisterAll bool + func InitiateDevicePlugin() { //Identifies GPUs and represents it in appropriate structures createIommuDeviceMap() @@ -183,7 +187,7 @@ func createIommuDeviceMap() { } //Nvidia vendor id is "10de". Proceed if vendor id is 10de - if vendorID == "10de" { + if vendorID == nvidiaVendorID { log.Println("Nvidia device ", info.Name()) //Retrieve iommu group for the device driver, err := readLink(basePath, info.Name(), "driver") @@ -198,17 +202,21 @@ func createIommuDeviceMap() { return nil } log.Println("Iommu Group " + iommuGroup) - _, exists := iommuMap[iommuGroup] + pciID := iommuGroup + if RegisterAll { + pciID = strings.Join([]string{iommuGroup, info.Name()}, deviceIDSeparator) + } + _, exists := iommuMap[pciID] if !exists { deviceID, err := readIDFromFile(basePath, info.Name(), "device") if err != nil { - log.Println("Could get deviceID for PCI address ", info.Name()) + log.Println("Could not get deviceID for PCI address ", info.Name()) return nil } log.Printf("Device Id %s", deviceID) - deviceMap[deviceID] = append(deviceMap[deviceID], iommuGroup) + deviceMap[deviceID] = append(deviceMap[deviceID], pciID) } - iommuMap[iommuGroup] = append(iommuMap[iommuGroup], NvidiaGpuDevice{info.Name()}) + iommuMap[pciID] = append(iommuMap[pciID], NvidiaGpuDevice{info.Name()}) } } return nil diff --git a/pkg/device_plugin/device_plugin_test.go b/pkg/device_plugin/device_plugin_test.go index a55d40f4..4d63c708 100644 --- a/pkg/device_plugin/device_plugin_test.go +++ b/pkg/device_plugin/device_plugin_test.go @@ -269,7 +269,7 @@ var _ = Describe("Device Plugin", func() { }) - It("", func() { + It("When RegisterAll set to default false", func() { readLink = getFakeLinkDevicePlugin readIDFromFile = getFakeIDFromFileDevicePlugin startDevicePlugin = fakeStartDevicePluginFunc @@ -285,6 +285,23 @@ var _ = Describe("Device Plugin", func() { stop <- struct{}{} }) + It("When RegisterAll = true", func() { + readLink = getFakeLinkDevicePlugin + readIDFromFile = getFakeIDFromFileDevicePlugin + startDevicePlugin = fakeStartDevicePluginFunc + RegisterAll = true + createIommuDeviceMap() + + iommuList := iommuMap["io_1|1"] + Expect(iommuList[0].addr).To(Equal("1")) + deviceList := deviceMap["1b80"] + Expect(deviceList[0]).To(Equal("io_1|1")) + + go createDevicePlugins() + time.Sleep(3 * time.Second) + stop <- struct{}{} + + }) }) Context("createVgpuIDMap() Tests", func() { diff --git a/pkg/device_plugin/generic_device_plugin.go b/pkg/device_plugin/generic_device_plugin.go index 5421e6d3..a3782e42 100644 --- a/pkg/device_plugin/generic_device_plugin.go +++ b/pkg/device_plugin/generic_device_plugin.go @@ -261,12 +261,16 @@ func (dpi *GenericDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.Al } for _, req := range reqs.ContainerRequests { deviceSpecs := make([]*pluginapi.DeviceSpec, 0) - for _, iommuId := range req.DevicesIDs { + for _, pciId := range req.DevicesIDs { devAddrs := []string{} + iommuId := pciId + if RegisterAll { + iommuId = strings.Split(pciId, deviceIDSeparator)[0] + } returnedMap := returnIommuMap() - //Retrieve the devices associated with a Iommu group - nvDev := returnedMap[iommuId] + //Retrieve the devices + nvDev := returnedMap[pciId] for _, dev := range nvDev { iommuGroup, err := readLink(basePath, dev.addr, "iommu_group") if err != nil || iommuGroup != iommuId { @@ -391,6 +395,9 @@ func (dpi *GenericDevicePlugin) healthCheck() error { for _, dev := range dpi.devs { devicePath := filepath.Join(path, dev.ID) + if RegisterAll { + devicePath = filepath.Join(path, strings.Split(dev.ID, deviceIDSeparator)[0]) + } err = watcher.Add(devicePath) log.Printf(" Adding Watcher to Path : %v", devicePath) pathDeviceMap[devicePath] = dev.ID