浏览代码

Issue #399: Subnet manager container for infiniband fabric

Signed-off-by: sakshiarora13 <sakshi_arora1@dell.com>
Lucas A. Wilson 3 年之前
父节点
当前提交
c0590259bf

+ 1 - 1
.metadata/omnia_version

@@ -1 +1 @@
-Omnia version 1.0.3
+Omnia version devel

+ 22 - 1
control_plane/input_params/ib_vars.yml

@@ -11,4 +11,25 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
----
+
+
+# Default configurations written for :
+  # ----Switch-IB(TM) 2 based EDR InfiniBand 1U Switch, 36 QSFP28 ports----
+# Change the configurations as per the switch model to avoid failures
+
+# Subnet Manager: centralized resource used by infiniband
+# to handle the management of the fabric.
+# Cache and Log directories of subnet manager
+# container are mounted to below mentioned path on host.
+
+# cache_directory: opensm stores certain data to the disk such that
+# subsequent runs are consistent. The default directory used is /var/cache/opensm.
+# The following files are included in it: guid2lid, guid2mkey, neighbors
+
+# log_directory: controls the directory in which the temporary files
+# generated by opensm are created. These files are: opensm-subnet.lst,
+# opensm.fdbs, and opensm.log. By default, this directory is /var/log.
+
+subnet_manager:
+  cache_directory: /var/cache/opensm
+  log_directory: /var/log

+ 665 - 0
control_plane/input_params/opensm.conf

@@ -0,0 +1,665 @@
+#
+# DEVICE ATTRIBUTES OPTIONS
+#
+# The port GUID on which the OpenSM is running
+guid 0x0000000000000000
+
+# M_Key value sent to all ports qualifying all Set(PortInfo)
+m_key 0x0000000000000000
+
+# The lease period used for the M_Key on this subnet in [sec]
+m_key_lease_period 0
+
+# The protection level used for the M_Key on this subnet
+m_key_protection_level 0
+
+# If TRUE, SM tries to determine the m_key of unknown ports from guid2mkey file
+# If FALSE, SM won't try to determine the m_key of unknown ports.
+# Preconfigured m_key will be used instead
+m_key_lookup TRUE
+
+# SM_Key value of the SM used for SM authentication
+sm_key 0x0000000000000001
+
+# SM_Key value to qualify rcv SA queries as 'trusted'
+sa_key 0x0000000000000001
+
+# Note that for both values above (sm_key and sa_key)
+# OpenSM version 3.2.1 and below used the default value '1'
+# in a host byte order, it is fixed now but you may need to
+# change the values to interoperate with old OpenSM running
+# on a little endian machine.
+
+# Subnet prefix used on this subnet
+subnet_prefix 0xfe80000000000000
+
+# The LMC value used on this subnet
+lmc 0
+
+# lmc_esp0 determines whether LMC value used on subnet is used for
+# enhanced switch port 0. If TRUE, LMC value for subnet is used for
+# ESP0. Otherwise, LMC value for ESP0s is 0.
+lmc_esp0 FALSE
+
+# sm_sl determines SMSL used for SM/SA communication
+sm_sl 0
+
+# The code of maximal time a packet can live in a switch
+# The actual time is 4.096usec * 2^<packet_life_time>
+# The value 0x14 disables this mechanism
+packet_life_time 0x12
+
+# The number of sequential packets dropped that cause the port
+# to enter the VLStalled state. The result of setting this value to
+# zero is undefined.
+vl_stall_count 0x07
+
+# The number of sequential packets dropped that cause the port
+# to enter the VLStalled state. This value is for switch ports
+# driving a CA or router port. The result of setting this value
+# to zero is undefined.
+leaf_vl_stall_count 0x07
+
+# The code of maximal time a packet can wait at the head of
+# transmission queue.
+# The actual time is 4.096usec * 2^<head_of_queue_lifetime>
+# The value 0x14 disables this mechanism
+head_of_queue_lifetime 0x12
+
+# The maximal time a packet can wait at the head of queue on
+# switch port connected to a CA or router port
+leaf_head_of_queue_lifetime 0x10
+
+# Limit the maximal operational VLs
+max_op_vls 5
+
+# Force PortInfo:LinkSpeedEnabled on switch ports
+# If 0, don't modify PortInfo:LinkSpeedEnabled on switch port
+# Otherwise, use value for PortInfo:LinkSpeedEnabled on switch port
+# Values are (IB Spec 1.2.1, 14.2.5.6 Table 146 "PortInfo")
+#    1: 2.5 Gbps
+#    3: 2.5 or 5.0 Gbps
+#    5: 2.5 or 10.0 Gbps
+#    7: 2.5 or 5.0 or 10.0 Gbps
+#    2,4,6,8-14 Reserved
+#    Default 15: set to PortInfo:LinkSpeedSupported
+force_link_speed 15
+
+# Force PortInfo:LinkSpeedExtEnabled on ports
+# If 0, don't modify PortInfo:LinkSpeedExtEnabled on port
+# Otherwise, use value for PortInfo:LinkSpeedExtEnabled on port
+# Values are (MgtWG RefIDs #4722 and #9366)
+#    1: 14.0625 Gbps
+#    2: 25.78125 Gbps
+#    3: 14.0625 Gbps or 25.78125 Gbps
+#    4: 53.125 Gbps
+#    5: 14.0625 Gbps or 53.125 Gbps
+#    6: 25.78125 Gbps or 53.125 Gbps
+#    7: 14.0625 Gbps, 25.78125 Gbps or 53.125 Gbps
+#    30: Disable extended link speeds
+#    Default 31: set to PortInfo:LinkSpeedExtSupported
+force_link_speed_ext 31
+
+# Force PortInfo:LinkWidthEnabled on switch ports
+# If 0, don't modify PortInfo:LinkWidthEnabled on switch port
+# Otherwise, use value for PortInfo:LinkWidthEnabled on switch port
+# Values are (IB Spec 1.2.1, 14.2.5.6 Table 146 "PortInfo"
+# augmented by MgtWG RefIDs #9306-9309)
+#    1: 1x
+#    2: 4x
+#    3: 1x or 4x
+#    4: 8x
+#    5: 1x or 8x
+#    6: 4x or 8x
+#    7: 1x or 4x or 8x
+#    8: 12x
+#    9: 1x or 12x
+#    10: 4x or 12x
+#    11: 1x or 4x or 12x
+#    12: 8x or 12x
+#    13: 1x or 8x or 12x
+#    14: 4x or 8x or 12x
+#    15: 1x or 4x or 8x or 12x
+#    16: 2x
+#    17: 1x or 2x
+#    18: 2x or 4x
+#    19: 1x or 2x or 4x
+#    20: 2x or 8x
+#    21: 1x or 2x or 8x
+#    22: 2x or 4x or 8x
+#    23: 1x or 2x or 4x or 8x
+#    24: 2x or 12x
+#    25: 1x or 2x or 12x
+#    26: 2x or 4x or 12x
+#    27: 1x or 2x or 4x or 12x
+#    28: 2x or 8x or 12x
+#    29: 1x or 2x or 8x or 12x
+#    30: 2x or 4x or 8x or 12x
+#    31: 1x or 2x or 4x or 8x or 12x
+#    32-254 Reserved
+#    Default 255: set to PortInfo:LinkWidthSupported
+force_link_width 255
+
+# FDR10 on ports on devices that support FDR10
+# Values are:
+#    0: don't use fdr10 (no MLNX ExtendedPortInfo MADs)
+#    Default 1: enable fdr10 when supported
+#    2: disable fdr10 when supported
+fdr10 1
+
+# The subnet_timeout code that will be set for all the ports
+# The actual timeout is 4.096usec * 2^<subnet_timeout>
+subnet_timeout 18
+
+# Threshold of local phy errors for sending Trap 129
+local_phy_errors_threshold 0x08
+
+# Threshold of credit overrun errors for sending Trap 130
+overrun_errors_threshold 0x08
+
+# Use SwitchInfo:MulticastFDBTop if advertised in PortInfo:CapabilityMask
+use_mfttop TRUE
+
+#
+# PARTITIONING OPTIONS
+#
+# Partition configuration file to be used
+partition_config_file /etc/rdma/partitions.conf
+
+# Disable partition enforcement by switches (DEPRECATED)
+# This option is DEPRECATED. Please use part_enforce instead
+no_partition_enforcement FALSE
+
+# Partition enforcement type (for switches)
+# Values are both, out, in and off
+# Default is both (outbound and inbound enforcement)
+part_enforce both
+
+# Allow both full and limited membership on the same partition
+allow_both_pkeys FALSE
+
+# Keep current and take into account old pkey indexes
+# during calculation of physical ports pkey tables
+keep_pkey_indexes TRUE
+
+# SM assigned GUID byte where GUID is formed from OpenFabrics OUI
+# followed by 40 bits xy 00 ab cd ef where xy is the SM assigned GUID byte
+# and ab cd ef is an SM autogenerated 24 bits
+# SM assigned GUID byte should be configured as subnet unique
+sm_assigned_guid 0x00
+
+#
+# SWEEP OPTIONS
+#
+# The number of seconds between subnet sweeps (0 disables it)
+sweep_interval 10
+
+# If TRUE cause all lids to be reassigned
+reassign_lids FALSE
+
+# If TRUE forces every sweep to be a heavy sweep
+force_heavy_sweep FALSE
+
+# If TRUE every trap 128 and 144 will cause a heavy sweep.
+# NOTE: successive identical traps (>10) are suppressed
+sweep_on_trap TRUE
+
+#
+# ROUTING OPTIONS
+#
+# If TRUE count switches as link subscriptions
+port_profile_switch_nodes FALSE
+
+# Name of file with port guids to be ignored by port profiling
+port_prof_ignore_file (null)
+
+# The file holding routing weighting factors per output port
+hop_weights_file (null)
+
+# The file holding non-default port order per switch for routing
+port_search_ordering_file (null)
+
+# Routing engine
+# Multiple routing engines can be specified separated by
+# commas so that specific ordering of routing algorithms will
+# be tried if earlier routing engines fail.
+# Supported engines: minhop, updn, dnup, file, ftree, lash,
+#    dor, torus-2QoS, nue, dfsssp, sssp
+routing_engine (null)
+
+# Routing engines will avoid throttled switch-to-switch links
+# (supported by: nue, dfsssp, sssp; use FALSE if unsure)
+avoid_throttled_links FALSE
+
+# Connect roots (use FALSE if unsure)
+connect_roots FALSE
+
+# Use unicast routing cache (use FALSE if unsure)
+use_ucast_cache FALSE
+
+# Lid matrix dump file name
+lid_matrix_dump_file (null)
+
+# LFTs file name
+lfts_file (null)
+
+# The file holding the root node guids (for fat-tree or Up/Down)
+# One guid in each line
+root_guid_file (null)
+
+# The file holding the fat-tree compute node guids
+# One guid in each line
+cn_guid_file (null)
+
+# The file holding the fat-tree I/O node guids
+# One guid in each line.
+# If only io_guid file is provided, the rest of nodes
+# are considered as compute nodes.
+io_guid_file (null)
+
+# If TRUE enables alternative indexing policy for ftree routing
+# in quasi-ftree topologies that can improve shift-pattern support.
+# The switch indexing starts from root switch and leaf switches
+# are termination points of BFS algorithm
+# If FALSE, the indexing starts from leaf switch (default)
+quasi_ftree_indexing FALSE
+
+# Number of reverse hops allowed for I/O nodes
+# Used for connectivity between I/O nodes connected to Top Switches
+max_reverse_hops 0
+
+# The file holding the node ids which will be used by Up/Down algorithm instead
+# of GUIDs (one guid and id in each line)
+ids_guid_file (null)
+
+# The file holding guid routing order guids (for MinHop and Up/Down)
+guid_routing_order_file (null)
+
+# Do mesh topology analysis (for LASH algorithm)
+do_mesh_analysis FALSE
+
+# Starting VL for LASH algorithm
+lash_start_vl 0
+
+# Maximum number of VLs for Nue routing algorithm (default: 1; to enforce
+# deadlock-freedom even if QoS is not enabled). Set to 0 if Nue should
+# automatically determine and choose maximum supported by the fabric, or
+# any integer >= 1 (then Nue uses min(max_supported,nue_max_num_vls)
+nue_max_num_vls 1
+
+# If TRUE, then Nue assumes that switches will send/receive
+# data traffic, too, and hence their paths are included in
+# the deadlock-avoidance calculation (use FALSE if unsure)
+nue_include_switches FALSE
+
+# Port Shifting (use FALSE if unsure)
+port_shifting FALSE
+
+# Assign ports in a random order instead of round-robin
+# If zero disable (default), otherwise use the value as a random seed
+scatter_ports 0
+
+# Don't use scatter for ports defined in
+# guid_routing_order file
+guid_routing_order_no_scatter FALSE
+
+# SA database file name
+sa_db_file (null)
+
+# If TRUE causes OpenSM to dump SA database at the end of
+# every light sweep, regardless of the verbosity level
+sa_db_dump FALSE
+
+# Torus-2QoS configuration file name
+torus_config /etc/rdma/torus-2QoS.conf
+
+#
+# HANDOVER - MULTIPLE SMs OPTIONS
+#
+# SM priority used for deciding who is the master
+# Range goes from 0 (lowest priority) to 15 (highest).
+sm_priority 0
+
+# If TRUE other SMs on the subnet should be ignored
+ignore_other_sm FALSE
+
+# Timeout in [msec] between two polls of active master SM
+sminfo_polling_timeout 10000
+
+# Number of failing polls of remote SM that declares it dead
+polling_retry_number 4
+
+# If TRUE honor the guid2lid file when coming out of standby
+# state, if such file exists and is valid
+honor_guid2lid_file FALSE
+
+#
+# TIMING AND THREADING OPTIONS
+#
+# Maximum number of SMPs sent in parallel
+max_wire_smps 4
+
+# Maximum number of timeout based SMPs allowed to be outstanding
+# A value less than or equal to max_wire_smps disables this mechanism
+max_wire_smps2 4
+
+# The timeout in [usec] used for sending SMPs above max_wire_smps limit
+# and below max_wire_smps2 limit
+max_smps_timeout 600000
+
+# The maximum time in [msec] allowed for a transaction to complete
+transaction_timeout 200
+
+# The maximum number of retries allowed for a transaction to complete
+transaction_retries 3
+
+# The maximum time in [msec] allowed for a "long" transacrion to complete
+# Currently, long transaction is only set of optimized SL2VLMappingTable
+long_transaction_timeout 500
+
+# Maximal time in [msec] a message can stay in the incoming message queue.
+# If there is more than one message in the queue and the last message
+# stayed in the queue more than this value, any SA request will be
+# immediately be dropped but BUSY status is not currently returned.
+max_msg_fifo_timeout 10000
+
+# Use a single thread for handling SA queries
+single_thread FALSE
+
+#
+# MISC OPTIONS
+#
+# Daemon mode
+daemon FALSE
+
+# SM Inactive
+sm_inactive FALSE
+
+# Babbling Port Policy
+babbling_port_policy FALSE
+
+# Drop event subscriptions (InformInfo and ServiceRecord) on port removal and SM coming out of STANDBY
+drop_event_subscriptions FALSE
+
+# Validate IPoIB non-broadcast group creation parameters against
+# broadcast group parameters per IETF RFC 4391 (default TRUE)
+ipoib_mcgroup_creation_validation TRUE
+
+# Validate multicast join parameters against multicast group
+# parameters when MC group already exists
+mcgroup_join_validation TRUE
+
+# Use original extended SA rates only
+# The original extended SA rates are up through 300 Gbps (12x EDR)
+# Set to TRUE for subnets with old kernels/drivers that don't understand
+# the new SA rates for 2x link width and/or HDR link speed (19-22)
+# default is FALSE
+use_original_extended_sa_rates_only FALSE
+
+# Use Optimized SLtoVLMapping programming if supported by device
+use_optimized_slvl FALSE
+
+# Sync in memory files used for high availability with storage
+fsync_high_avail_files TRUE
+
+#
+# Performance Manager Options
+#
+# perfmgr enable
+# PerfMgr is enabled if TRUE and disabled if FALSE (default FALSE)
+perfmgr FALSE
+
+# redirection enable
+# Redirection supported if TRUE and not supported if FALSE (default TRUE)
+perfmgr_redir TRUE
+
+# sweep time in seconds (default 180 seconds)
+perfmgr_sweep_time_s 180
+
+# Max outstanding queries (default 500)
+perfmgr_max_outstanding_queries 500
+
+# Ignore CAs on sweep (default FALSE)
+perfmgr_ignore_cas FALSE
+
+# Remove missing nodes from DB (default TRUE)
+perfmgr_rm_nodes TRUE
+
+# Log error counters to opensm.log (default TRUE)
+perfmgr_log_errors TRUE
+
+# Query PerfMgt Get(ClassPortInfo) for extended capabilities
+# Extended capabilities include 64 bit extended counters
+# and transmit wait support (default TRUE)
+perfmgr_query_cpi TRUE
+
+# Log xmit_wait errors (default FALSE)
+perfmgr_xmit_wait_log FALSE
+
+# If logging xmit_wait's; set threshold (default 65535)
+perfmgr_xmit_wait_threshold 65535
+
+#
+# Event DB Options
+#
+# Dump file to dump the events to
+event_db_dump_file (null)
+
+#
+# Event Plugin Options
+#
+# Event plugin name(s)
+event_plugin_name (null)
+
+# Options string that would be passed to the plugin(s)
+event_plugin_options (null)
+
+#
+# Node name map for mapping node's to more descriptive node descriptions
+# (man ibnetdiscover for more information)
+#
+node_name_map_name (null)
+
+#
+# DEBUG FEATURES
+#
+# The log flags used
+log_flags 0x03
+
+# Force flush of the log file after each log message
+force_log_flush FALSE
+
+# Log file to be used
+log_file /var/log/opensm.log
+
+# Limit the size of the log file in MB. If overrun, log is restarted
+log_max_size 0
+
+# If TRUE will accumulate the log over multiple OpenSM sessions
+accum_log_file TRUE
+
+# Per module logging configuration file
+# Each line in config file contains <module_name><separator><log_flags>
+# where module_name is file name including .c
+# separator is either = , space, or tab
+# log_flags is the same flags as used in the coarse/overall logging
+per_module_logging_file /etc/rdma/per-module-logging.conf
+
+# The directory to hold the file OpenSM dumps
+dump_files_dir /var/log/
+
+# If TRUE enables new high risk options and hardware specific quirks
+enable_quirks FALSE
+
+# If TRUE disables client reregistration
+no_clients_rereg FALSE
+
+# If TRUE OpenSM should disable multicast support and
+# no multicast routing is performed if TRUE
+disable_multicast FALSE
+
+# If TRUE opensm will exit on fatal initialization issues
+exit_on_fatal TRUE
+
+# console [off|local]
+console off
+
+# Telnet port for console (default 10000)
+console_port 10000
+
+#
+# QoS OPTIONS
+#
+# Enable QoS setup
+qos FALSE
+
+# QoS policy file to be used
+qos_policy_file /etc/rdma/qos-policy.conf
+
+# Suppress QoS MAD status errors
+suppress_sl2vl_mad_status_errors FALSE
+
+# QoS default options
+qos_max_vls 0
+qos_high_limit -1
+qos_vlarb_high (null)
+qos_vlarb_low (null)
+qos_sl2vl (null)
+
+# QoS CA options
+qos_ca_max_vls 0
+qos_ca_high_limit -1
+qos_ca_vlarb_high (null)
+qos_ca_vlarb_low (null)
+qos_ca_sl2vl (null)
+
+# QoS Switch Port 0 options
+qos_sw0_max_vls 0
+qos_sw0_high_limit -1
+qos_sw0_vlarb_high (null)
+qos_sw0_vlarb_low (null)
+qos_sw0_sl2vl (null)
+
+# QoS Switch external ports options
+qos_swe_max_vls 0
+qos_swe_high_limit -1
+qos_swe_vlarb_high (null)
+qos_swe_vlarb_low (null)
+qos_swe_sl2vl (null)
+
+# QoS Router ports options
+qos_rtr_max_vls 0
+qos_rtr_high_limit -1
+qos_rtr_vlarb_high (null)
+qos_rtr_vlarb_low (null)
+qos_rtr_sl2vl (null)
+
+#
+# Congestion Control OPTIONS (EXPERIMENTAL)
+#
+
+# Enable Congestion Control Configuration
+congestion_control FALSE
+
+# CCKey to use when configuring congestion control
+# note that this does not configure a new CCkey, only the CCkey to use
+cc_key 0x0000000000000000
+
+# Congestion Control Max outstanding MAD
+cc_max_outstanding_mads 500
+
+#
+# Congestion Control SwitchCongestionSetting options
+#
+# Control Map - bitmask indicating which of the following are to be used
+# bit 0 - victim mask
+# bit 1 - credit mask
+# bit 2 - threshold + packet size
+# bit 3 - credit starvation threshold + return delay valid
+# bit 4 - marking rate valid
+cc_sw_cong_setting_control_map 0x0
+
+# Victim Mask - 256 bit mask representing switch ports, mark packets with FECN
+# whether they are the source or victim of congestion
+# bit 0 - port 0 (enhanced port)
+# bit 1 - port 1
+# ...
+# bit 254 - port 254
+# bit 255 - reserved
+cc_sw_cong_setting_victim_mask 0x0000000000000000000000000000000000000000000000000000000000000000
+
+# Credit Mask - 256 bit mask representing switch ports to apply credit starvation
+# bit 0 - port 0 (enhanced port)
+# bit 1 - port 1
+# ...
+# bit 254 - port 254
+# bit 255 - reserved
+cc_sw_cong_setting_credit_mask 0x0000000000000000000000000000000000000000000000000000000000000000
+
+# Threshold - value indicating aggressiveness of congestion marking
+# 0x0 - none, 0x1 - loose, ..., 0xF - aggressive
+cc_sw_cong_setting_threshold 0x00
+
+# Packet Size - any packet less than this size will not be marked with a FECN
+# units are in credits
+cc_sw_cong_setting_packet_size 0
+
+# Credit Starvation Threshold - value indicating aggressiveness of credit starvation
+# 0x0 - none, 0x1 - loose, ..., 0xF - aggressive
+cc_sw_cong_setting_credit_starvation_threshold 0x00
+
+# Credit Starvation Return Delay - in CCT entry shift:multiplier format, see IB spec
+cc_sw_cong_setting_credit_starvation_return_delay 0:0
+
+# Marking Rate - mean number of packets between markings
+cc_sw_cong_setting_marking_rate 0
+
+#
+# Congestion Control CA Congestion Setting options
+#
+# Port Control
+# bit 0 = 0, QP based congestion control
+# bit 0 = 1, SL/port based congestion control
+cc_ca_cong_setting_port_control 0x0000
+
+# Control Map - 16 bit bitmask indicating which SLs should be configured
+cc_ca_cong_setting_control_map 0x0000
+
+#
+# CA Congestion Setting Entries
+#
+# Each of congestion control settings below configures the CA Congestion
+# Settings for an individual SL.  The SL must be specified before the value.
+# These options may be specified multiple times to configure different values
+# for different SLs.
+#
+# ccti timer - when expires decrements 1 from the CCTI
+# ccti increase - number to be added to the table index on receipt of a BECN
+# trigger threshold - when the ccti is equal to this, an event is logged
+# ccti min - the minimum value for the ccti.  This imposes a minimum rate
+#            on the injection rate
+
+# SL = 0
+cc_ca_cong_setting_ccti_timer 0 0
+cc_ca_cong_setting_ccti_increase 0 0
+cc_ca_cong_setting_trigger_threshold 0 0
+cc_ca_cong_setting_ccti_min 0 0
+
+#
+# Congestion Control Table
+#
+# Comma separated list of CCT entries representing CCT.
+# Format is shift:multipler,shift_multiplier,shift:multiplier,...
+cc_cct (null)
+
+# Prefix routes file name
+prefix_routes_file /etc/rdma/prefix-routes.conf
+
+#
+# IPv6 Solicited Node Multicast (SNM) Options
+#
+consolidate_ipv6_snm_req FALSE
+
+# Log prefix
+log_prefix (null)
+

+ 17 - 0
control_plane/roles/control_plane_sm/files/Dockerfile

@@ -0,0 +1,17 @@
+FROM centos:8
+
+RUN dnf -y update && dnf clean all
+
+RUN dnf install -y epel-release
+
+RUN dnf groupinstall "Infiniband Support" -y
+
+RUN dnf install -y opensm
+
+COPY opensm.conf /etc/rdma/opensm.conf
+
+COPY start.sh /
+
+RUN chmod +x /start.sh
+
+ENTRYPOINT ["/start.sh"]

+ 43 - 0
control_plane/roles/control_plane_sm/files/k8s_sm.yml

@@ -0,0 +1,43 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: subnet-manager
+  namespace: subnet-manager
+  labels:
+    app: subnet-manager
+spec:
+  selector:
+    matchLabels:
+      app: subnet-manager
+  replicas: 1
+  strategy:
+    type: RollingUpdate
+  template:
+    metadata:
+      labels:
+        app: subnet-manager
+    spec:
+      hostNetwork: true
+      containers:
+        - name: opensm-service
+          image: 'localhost/opensm-service:latest'
+          imagePullPolicy: Never
+          volumeMounts:
+            - mountPath: /var/cache/opensm
+              name: opensm-cache
+            - mountPath: /var/log
+              name: opensm-logs
+          resources:
+            limits:
+              memory: "10Gi"
+          securityContext:
+            privileged: true
+      volumes:
+        - name: opensm-cache
+          hostPath:
+            path: /var/cache/opensm
+            type: Directory
+        - name: opensm-logs
+          hostPath:
+            path: /var/log
+            type: Directory

+ 5 - 0
control_plane/roles/control_plane_sm/files/start.sh

@@ -0,0 +1,5 @@
+#!/bin/sh
+
+/usr/libexec/rdma-init-kernel
+
+exec /usr/sbin/opensm -F /etc/rdma/opensm.conf

+ 28 - 0
control_plane/roles/control_plane_sm/tasks/create_image.yml

@@ -0,0 +1,28 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+# tasks file for sm image creation
+
+- name: Get buildah images
+  command: buildah images
+  changed_when: false
+  register: buildah_images
+  tags: install
+
+- name: Subnet manager image creation (It may take 5-10 mins)
+  command: "buildah bud -t {{ sm_docker_image_name }}:{{ sm_docker_image_tag }} --network host -f {{ role_path }}/files/Dockerfile"
+  args:
+    chdir: "{{ role_path }}/files/"
+  when: "sm_docker_image_name not in buildah_images.stdout"
+  tags: install

+ 67 - 0
control_plane/roles/control_plane_sm/tasks/create_pod.yml

@@ -0,0 +1,67 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+# tasks file for sm pod creation
+- name: Get namespaces
+  command: kubectl get namespaces
+  changed_when: false
+  register: k8s_namespaces
+  tags: install
+
+- name: Ensure that namespace is created
+  command: kubectl create namespace subnet-manager
+  when: "'subnet-manager' not in k8s_namespaces.stdout"
+  tags: install
+
+- name: Get K8s pods
+  command: kubectl get pods --all-namespaces
+  changed_when: false
+  register: k8s_pods
+  tags: install
+
+- name: Create subnet manager pod
+  block:
+    - name: Replace container name in sm config file
+      replace:
+        path: "{{ sm_kube_config_file }}"
+        regexp: "      containers:\n        - name:.*"
+        replace: "      containers:\n        - name: {{ sm_container_name }}"
+      tags: install
+
+    - name: Replace image name in sm config file
+      replace:
+        path: "{{ sm_kube_config_file }}"
+        regexp: "          image:.*"
+        replace: "          image: 'localhost/{{sm_docker_image_name}}:{{ sm_docker_image_tag }}'"
+      tags: install
+
+    - name: Replace cache directory in sm config file
+      replace:
+        path: "{{ sm_kube_config_file }}"
+        regexp: "        - name: opensm-cache\n          hostPath:\n            path:.*"
+        replace: "        - name: opensm-cache\n          hostPath:\n            path: {{ subnet_manager.cache_directory }} "
+      tags: install
+
+    - name: Replace cache directory in sm config file
+      replace:
+        path: "{{ sm_kube_config_file }}"
+        regexp: "        - name: opensm-logs\n          hostPath:\n            path:.*"
+        replace: "        - name: opensm-logs\n          hostPath:\n            path: {{ subnet_manager.log_directory }} "
+      tags: install
+
+    - name: Create subnet manager pod
+      command: "kubectl apply -f {{ sm_kube_config_file }}"
+      tags: install
+
+  when: "'subnet-manager' not in k8s_pods.stdout"

+ 25 - 13
control_plane/roles/control_plane_sm/tasks/main.yml

@@ -1,19 +1,31 @@
 # Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
 #
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 ---
+# tasks file for control_plane_sm
 
-# Will be updated later in each PR
-- name: Pass
-  debug:
-    msg: "Pass"
+- name: Check if IB switch is supported
+  block:
+    - name: Check pre-requisites
+      include_tasks: pre_requisites.yml
+      tags: install
+
+    - name: Create image
+      include_tasks: create_image.yml
+      tags: install
+
+    - name: Check pod
+      include_tasks: create_pod.yml
+      tags: install
+
+  when: ib_switch_support

+ 49 - 0
control_plane/roles/control_plane_sm/tasks/pre_requisites.yml

@@ -0,0 +1,49 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+# pre-req
+
+- name: Create directories if they don't exist
+  block:
+    - name: Checking directory
+      stat:
+        path: "{{ item }}"
+      register: stat_result
+      with_items:
+        - "{{ subnet_manager.cache_directory }}"
+        - "{{ subnet_manager.log_directory }}"
+      loop_control:
+        label: "{{ item }}"
+      tags: install
+
+    - name: Creating directories
+      file:
+        path: "{{ item.item }}"
+        state: directory
+        mode: "{{ folder_perm }}"
+        group: root
+        owner: root
+      when: not item.stat.exists
+      loop: "{{ stat_result.results }}"
+      loop_control:
+        label: "{{ item.item }}"
+      tags: install
+
+- name: Copy opensm configuration file
+  copy:
+    src: "{{ opensm_conf_file }}"
+    dest: "{{ opensm_conf_file_dest }}"
+    force: yes
+  tags: install

+ 28 - 0
control_plane/roles/control_plane_sm/vars/main.yml

@@ -0,0 +1,28 @@
+# Copyright 2021 Dell Inc. or its subsidiaries. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+
+# Usage: pre_requisites.yml
+folder_perm: 644
+opensm_conf_file: "{{ role_path }}/../../input_params/opensm.conf"
+opensm_conf_file_dest: "{{ role_path }}/files/opensm.conf"
+
+# Usage: create_image.yml
+sm_docker_image_name: opensm-service
+sm_docker_image_tag: latest
+
+#Usage: create_pod.yml
+sm_container_name: opensm-container
+sm_kube_config_file: "{{ role_path }}/files/k8s_sm.yml"
+opensm_conf_file: "{{ role_path }}/../../input_params/opensm.conf"