{
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"variables": {
"isScaleSetPrioritySpot": "[equals(parameters('scaleSetPriority'), 'Spot')]",
"defaultAadProfile": {
"managed": true,
"adminGroupObjectIDs": "[parameters('adminGroupObjectIDs')]",
"enableAzureRBAC": "[parameters('azureRbac')]"
},
"defaultApiServerAccessProfile": {
"authorizedIPRanges": "[if(parameters('enableAuthorizedIpRange'), parameters('authorizedIPRanges'), null())]",
"enablePrivateCluster": "[parameters('enablePrivateCluster')]"
},
"defaultAzurePolicy": {
"enabled": "[parameters('enableAzurePolicy')]"
},
"defaultSecrectStoreProvider": {
"enabled": "[parameters('enableSecretStoreCSIDriver')]",
"config": "[if(parameters('enableSecretStoreCSIDriver'), variables('secrectStoreConfig'), null())]"
},
"secrectStoreConfig": {
"enableSecretRotation": "false",
"rotationPollInterval": "2m"
},
"servicePrincipalProfile": {
"ClientId": "[parameters('servicePrincipalClientId')]",
"Secret": "[parameters('servicePrincipalClientSecret')]"
}
},
"parameters": {
"apiVersion": {
"type": "string"
},
"resourceName": {
"type": "string",
"metadata": {
"description": "The name of the Managed Cluster resource."
}
},
"location": {
"type": "string",
"metadata": {
"description": "The location of AKS resource."
}
},
"isLocationEdgeZone": {
"defaultValue": false,
"type": "bool"
},
"edgeZone": {
"defaultValue": {},
"type": "object",
"metadata": {
"description": "Extended location of the cluster."
}
},
"useServicePrincipal": {
"defaultValue": false,
"type": "bool"
},
"clusterSku": {
"defaultValue": {
"name": "Base",
"tier": "Standard"
},
"type": "object",
"metadata": {
"descirption": "The managed cluster SKU tier."
}
},
"clusterTags": {
"defaultValue": {},
"type": "object",
"metadata": {
"description": "Specifies the tags of the AKS cluster."
}
},
"tagsForAllResources": {
"defaultValue": {},
"type": "object"
},
"clusterIdentity": {
"defaultValue": {
"type": "SystemAssigned"
},
"type": "object",
"metadata": {
"description": "The identity of the managed cluster, if configured."
}
},
"enableAadProfile": {
"defaultValue": false,
"type": "bool",
"metadata": {
"description": "Flag to turn on or off of Microsoft Entra ID Profile."
}
},
"aadProfile": {
"defaultValue": {},
"type": "object",
"metadata": {
"descirption": "The Microsoft Entra ID configuration."
}
},
"dnsPrefix": {
"type": "string",
"metadata": {
"description": "Optional DNS prefix to use with hosted Kubernetes API server FQDN."
}
},
"kubernetesVersion": {
"type": "string",
"defaultValue": "1.7.7",
"metadata": {
"description": "The version of Kubernetes."
}
},
"enableRBAC": {
"type": "bool",
"defaultValue": true,
"metadata": {
"description": "Boolean flag to turn on and off of RBAC."
}
},
"windowsProfile": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Boolean flag to turn on and off of virtual machine scale sets"
}
},
"nodeResourceGroup": {
"type": "string",
"metadata": {
"description": "The name of the resource group containing agent pool nodes."
}
},
"upgradeChannel": {
"defaultValue": "none",
"type": "string",
"allowedValues": [
"none",
"patch",
"rapid",
"stable",
"node-image"
],
"metadata": {
"description": "Auto upgrade channel for a managed cluster."
}
},
"servicePrincipalClientId": {
"defaultValue": "",
"metadata": {
"description": "Client ID (used by cloudprovider)."
},
"type": "securestring"
},
"servicePrincipalClientSecret": {
"defaultValue": "",
"metadata": {
"description": "The Service Principal Client Secret."
},
"type": "securestring"
},
"adminGroupObjectIDs": {
"type": "array",
"defaultValue": "",
"metadata": {
"description": "An array of Microsoft Entra group object ids to give administrative access."
}
},
"principalId": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "The objectId of service principal."
}
},
"supportPlan": {
"type": "string",
"defaultValue": "KubernetesOfficial",
"allowedValues": [
"AKSLongTermSupport",
"KubernetesOfficial"
]
},
"azureRbac": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Enable or disable Azure RBAC."
}
},
"disableLocalAccounts": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Enable or disable local accounts."
}
},
"enablePrivateCluster": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Enable private network access to the Kubernetes cluster."
}
},
"isPrivateClusterSupported": {
"type": "bool",
"defaultValue": false
},
"enableAuthorizedIpRange": {
"type": "bool",
"defaultValue": false
},
"authorizedIPRanges": {
"defaultValue": [],
"type": "array",
"metadata": {
"description": "Boolean flag to turn on and off http application routing."
}
},
"isPublicNetworkAccessEnabled": {
"type": "bool",
"defaultValue": false
},
"publicNetworkAccess": {
"defaultValue": "Enabled",
"type": "string",
"allowedValues": [
"Disabled",
"Enabled",
"SecuredByPerimeter"
],
"metadata": {
"description": "Allow or deny public network access for AKS."
}
},
"enableDiskEncryptionSetID": {
"defaultValue": false,
"type": "bool",
"metadata": {
"description": "Flag to turn on or off of diskEncryptionSetID. Set diskEncryptionSetID to null when false."
}
},
"diskEncryptionSetID": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "The ID of the disk encryption set used to encrypt the OS disks of the nodes."
}
},
"aadSessionKey": {
"type": "securestring",
"defaultValue": ""
},
"nodeAutoProvisioningMode": {
"type": "string",
"defaultValue": "Manual",
"allowedValues": [
"Auto",
"Manual"
],
"metadata": {
"description": "Flag to turn on or off for nodeAutoProvisioning."
}
},
"isAzurePolicySupported": {
"type": "bool",
"defaultValue": false
},
"enableAzurePolicy": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Boolean flag to turn on and off Azure Policy addon."
}
},
"isSecretStoreCSIDDriverSupported": {
"type": "bool",
"defaultValue": false
},
"enableSecretStoreCSIDriver": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Boolean flag to turn on and off secret store CSI driver."
}
},
"enableOmsAgent": {
"type": "bool",
"defaultValue": true,
"metadata": {
"description": "Boolean flag to turn on and off omsagent addon."
}
},
"workspaceRegion": {
"type": "string",
"defaultValue": "East US",
"metadata": {
"description": "Specify the region for your OMS workspace."
}
},
"workspaceName": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Specify the name of the OMS workspace."
}
},
"omsWorkspaceId": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Specify the resource id of the OMS workspace."
}
},
"omsSku": {
"type": "string",
"defaultValue": "standalone",
"allowedValues": [
"free",
"standalone",
"pernode"
],
"metadata": {
"description": "Select the SKU for your workspace."
}
},
"aciVnetSubnetName": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Name of virtual network subnet used for the ACI Connector."
}
},
"aciConnectorLinuxEnabled": {
"defaultValue": false,
"type": "bool",
"metadata": {
"description": "Enables the Linux ACI Connector."
}
},
"acrName": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Specify the name of the Azure Container Registry."
}
},
"acrResourceGroup": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "The name of the resource group the container registry is associated with."
}
},
"guidValue": {
"type": "string",
"metadata": {
"description": "The unique id used in the role assignment of the kubernetes service to the container registry service. It is recommended to use the default value."
},
"defaultValue": "[newGuid()]"
},
"enableVnetSubnetID": {
"defaultValue": false,
"type": "bool",
"metadata": {
"description": "Flag to turn on or off of vnetSubnetID."
}
},
"vnetSubnetID": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Resource ID of virtual network subnet used for nodes and/or pods IP assignment."
}
},
"loadBalancerSku": {
"defaultValue": "Standard",
"type": "string",
"allowedValues": [
"Basic",
"Standard"
],
"metadata": {
"description": "Specifies the sku of the load balancer used by the virtual machine scale sets used by node pools."
}
},
"networkPolicy": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Network policy used for building the Kubernetes network."
}
},
"networkPlugin": {
"defaultValue": "azure",
"type": "string",
"allowedValues": [
"azure",
"kubenet"
],
"metadata": {
"description": "Network plugin used for building the Kubernetes network."
}
},
"networkPluginMode": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Network plugin mode used for building the Kubernetes network."
}
},
"networkDataplane": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Network dataplane used in the Kubernetes cluster."
}
},
"serviceCidr": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "A CIDR notation IP range from which to assign service cluster IPs."
}
},
"dnsServiceIP": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Containers DNS server IP address."
}
},
"enableContainerNetworkObservability": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Boolean flag to turn on and off Container Network Observability."
}
},
"enableContainerNetworkSecurity": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Boolean flag to turn on and off Container Network Security."
}
},
"spotMaxPrice": {
"defaultValue": "",
"type": "string",
"metadata": {
"description": "Possible values are any decimal value greater than zero or -1 which indicates the willingness to pay any on-demand price."
}
},
"vmssNodePool": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Boolean flag to turn on and off of virtual machine scale sets"
}
},
"isAvailabilityZoneEnabled": {
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Boolean flag to turn on or off of Availability Zone"
}
},
"osDiskSizeGB": {
"type": "int",
"defaultValue": 0,
"metadata": {
"description": "Disk size (in GiB) to provision for each of the agent pool nodes. This value ranges from 0 to 1023. Specifying 0 will apply the default disk size for that agentVMSize."
},
"minValue": 0,
"maxValue": 1023
},
"agentCount": {
"defaultValue": 3,
"minValue": 1,
"maxValue": 50,
"type": "int",
"metadata": {
"description": "The number of agent nodes for the cluster. Production workloads have a recommended minimum of 3."
}
},
"scaleSetEvictionPolicy": {
"defaultValue": "Delete",
"allowedValues": [
"Delete",
"Deallocate"
],
"type": "string",
"metadata": {
"description": "Specifies the ScaleSetEvictionPolicy to be used to specify eviction policy for spot virtual machine scale set. Default to Delete. Allowed values are Delete or Deallocate."
}
},
"scaleSetPriority": {
"defaultValue": "Regular",
"allowedValues": [
"Spot",
"Regular"
],
"type": "string",
"metadata": {
"description": "Specifies the virtual machine scale set priority in the user node pool: Spot or Regular."
}
},
"agentTags": {
"defaultValue": {},
"type": "object",
"metadata": {
"description": "Specifies the tags of the agent pool."
}
},
"enableNodePublicIP": {
"defaultValue": false,
"type": "bool",
"metadata": {
"description": "Some scenarios may require nodes in a node pool to receive their own dedicated public IP addresses."
}
},
"agentNodeTaints": {
"defaultValue": [],
"type": "array",
"metadata": {
"description": "Specifies the taints added to new nodes during node pool create and scale. For example, key=value:NoSchedule. - string."
}
},
"agentNodeLables": {
"defaultValue": {},
"type": "object",
"metadata": {
"description": "Specifies the Agent pool node labels to be persisted across all nodes in the system node pool."
}
},
"agentAvailabilityZones": {
"defaultValue": [],
"type": "array",
"metadata": {
"description": "Specifies the availability zones for the agent nodes in the agent node pool. Requires the use of VirtualMachineScaleSets as node pool type."
}
},
"agentMode": {
"defaultValue": "System",
"type": "string",
"allowedValues": [
"System",
"User"
],
"metadata": {
"description": "A cluster must have at least one 'System' Agent Pool at all times."
}
},
"agentMaxPods": {
"defaultValue": 30,
"type": "int",
"metadata": {
"description": "Specifies the maximum number of pods that can run on a node in the agent node pool. The maximum number of pods per node in an AKS cluster is 250. The default maximum number of pods per node varies between kubenet and Azure CNI networking, and the method of cluster deployment."
}
},
"agentPoolType": {
"defaultValue": "VirtualMachineScaleSets",
"allowedValues": [
"VirtualMachineScaleSets",
"VirtualMachines",
"AvailabilitySet"
],
"type": "string",
"metadata": {
"description": "The type of agent pool."
}
},
"agentOSType": {
"defaultValue": "Linux",
"allowedValues": [
"Linux",
"Windows"
],
"type": "string",
"metadata": {
"description": "The type of operating system for agent pool."
}
},
"agentVMSize": {
"defaultValue": "Standard_D2_v3",
"type": "string",
"metadata": {
"description": "The size of the Virtual Machine."
}
},
"agentMaxCount": {
"defaultValue": 5,
"type": "int",
"metadata": {
"description": "Specifies the maximum number of nodes for auto-scaling for the system node pool."
}
},
"agentMinCount": {
"defaultValue": 3,
"type": "int",
"metadata": {
"description": "Specifies the minimum number of nodes for auto-scaling for the system node pool."
}
},
"enableAutoScaling": {
"defaultValue": true,
"type": "bool",
"metadata": {
"description": "Specifies whether to enable auto-scaling for the system node pool."
}
},
"serviceMeshMode": {
"type": "string",
"allowedValues": [
"Disabled",
"Istio"
],
"defaultValue": "Disabled"
},
"istioInternalIngressGateway": {
"type": "bool",
"defaultValue": false
},
"istioExternalIngressGateway": {
"type": "bool",
"defaultValue": false
},
"nodeOSUpgradeChannel": {
"defaultValue": "NodeImage",
"type": "string",
"allowedValues": [
"None",
"Unmanaged",
"SecurityPatch",
"NodeImage"
],
"metadata": {
"description": "Auto upgrade channel for node OS security."
}
},
"isImageCleanerEnabled": {
"type": "bool",
"defaultValue": false
},
"imageCleanerIntervalHours": {
"type": "int",
"defaultValue": 168
},
"enableOIDC": {
"type": "bool",
"defaultValue": true,
"metadata": {
"description": "Whether the OIDC issuer is enabled."
}
},
"issuerURL": {
"type": "string",
"defaultValue": "",
"metadata": {
"description": "The OIDC issuer url of the Managed Cluster."
}
},
"enableWorkloadIdentity": {
"type": "bool",
"defaultValue": true,
"metadata": {
"description": "Whether to enable workload identity."
}
},
"fleetName": {
"type": "string",
"defaultValue": ""
},
"fleetLocation": {
"type": "string",
"defaultValue": ""
},
"fleetDnsNamePrefix": {
"type": "string",
"defaultValue": ""
}
},
"resources": [
{
"type": "Microsoft.ContainerService/managedClusters/maintenanceConfigurations",
"name": "aks-cluster-1/aksManagedAutoUpgradeSchedule",
"apiVersion": "2025-05-01",
"dependsOn": [
"[concat('Microsoft.ContainerService/managedClusters/', parameters('resourceName'))]"
],
"properties": {
"maintenanceWindow": {
"schedule": {
"daily": null,
"weekly": {
"intervalWeeks": 1,
"dayOfWeek": "Sunday"
},
"absoluteMonthly": null,
"relativeMonthly": null
},
"durationHours": 8,
"utcOffset": "+00:00",
"startDate": "2026-05-07",
"startTime": "00:00"
}
}
},
{
"type": "Microsoft.ContainerService/managedClusters/maintenanceConfigurations",
"name": "aks-cluster-1/aksManagedNodeOSUpgradeSchedule",
"apiVersion": "2025-05-01",
"dependsOn": [
"[concat('Microsoft.ContainerService/managedClusters/', parameters('resourceName'))]"
],
"properties": {
"maintenanceWindow": {
"schedule": {
"weekly": {
"intervalWeeks": 1,
"dayOfWeek": "Sunday"
}
},
"durationHours": 8,
"utcOffset": "+00:00",
"startDate": "2026-05-07",
"startTime": "00:00"
}
}
},
{
"type": "Microsoft.ContainerService/managedClusters",
"apiVersion": "[parameters('apiVersion')]",
"sku": "[parameters('clusterSku')]",
"identity": "[parameters('clusterIdentity')]",
"dependsOn": [
"Microsoft.Resources/deployments/CreateUserAssignedIdentity-20260507163212-78",
"Microsoft.Resources/deployments/AddNetworkContributorRoleAssignmentToVirtualNe-20260507163212-44",
"VnetDeployment-c3ce6448-cb07-f66c-a8ec-b7b242b56e17"
],
"location": "[parameters('location')]",
"name": "[parameters('resourceName')]",
"extendedLocation": "[if(parameters('isLocationEdgeZone'), parameters('edgeZone'), null())]",
"properties": {
"kubernetesVersion": "[parameters('kubernetesVersion')]",
"enableRBAC": "[parameters('enableRBAC')]",
"dnsPrefix": "[parameters('dnsPrefix')]",
"nodeResourceGroup": "[parameters('nodeResourceGroup')]",
"disableLocalAccounts": "[parameters('disableLocalAccounts')]",
"aadProfile": "[if(parameters('enableAadProfile'), variables('defaultAadProfile'), null())]",
"autoUpgradeProfile": {
"upgradeChannel": "[parameters('upgradeChannel')]",
"nodeOSUpgradeChannel": "[parameters('nodeOSUpgradeChannel')]"
},
"agentPoolProfiles": [
{
"name": "agentpool",
"osDiskSizeGB": "[parameters('osDiskSizeGB')]",
"count": 2,
"enableAutoScaling": true,
"minCount": 2,
"maxCount": 5,
"vmSize": "Standard_D2als_v7",
"osType": "Linux",
"osSKU": "Ubuntu",
"type": "VirtualMachineScaleSets",
"mode": "System",
"maxPods": 110,
"availabilityZones": [
"1",
"2",
"3"
],
"nodeLabels": {},
"nodeTaints": [],
"enableNodePublicIP": false,
"tags": {
"Managed By": "Manually by Solomon"
},
"vnetSubnetID": "[parameters('vnetSubnetID')]"
}
],
"apiServerAccessProfile": "[if(parameters('isPrivateClusterSupported'), variables('defaultApiServerAccessProfile'), null())]",
"addonProfiles": {
"azurepolicy": "[if(parameters('isAzurePolicySupported'), variables('defaultAzurePolicy'), null())]",
"azureKeyvaultSecretsProvider": "[if(parameters('isSecretStoreCSIDDriverSupported'), variables('defaultSecrectStoreProvider'), null())]"
},
"diskEncryptionSetID": "[if(parameters('enableDiskEncryptionSetID'), parameters('diskEncryptionSetID'), null())]",
"networkProfile": {
"loadBalancerSku": "[parameters('loadBalancerSku')]",
"networkPlugin": "[parameters('networkPlugin')]",
"networkPluginMode": "[parameters('networkPluginMode')]",
"networkDataplane": "[parameters('networkDataplane')]",
"networkPolicy": "[parameters('networkPolicy')]",
"serviceCidr": "[parameters('serviceCidr')]",
"dnsServiceIP": "[parameters('dnsServiceIP')]",
"advancedNetworking": {
"enabled": "[or(parameters('enableContainerNetworkObservability'), parameters('enableContainerNetworkSecurity'))]",
"observability": {
"enabled": "[parameters('enableContainerNetworkObservability')]"
},
"security": {
"enabled": "[parameters('enableContainerNetworkSecurity')]"
}
}
},
"supportPlan": "[parameters('supportPlan')]",
"securityProfile": {
"imageCleaner": {
"enabled": "[parameters('isImageCleanerEnabled')]",
"intervalHours": "[parameters('imageCleanerIntervalHours')]"
},
"workloadIdentity": {
"enabled": "[parameters('enableWorkloadIdentity')]"
}
},
"oidcIssuerProfile": {
"enabled": "[parameters('enableOIDC')]"
},
"nodeProvisioningProfile": {
"mode": "[parameters('nodeAutoProvisioningMode')]"
}
},
"tags": "[parameters('clusterTags')]"
},
{
"name": "CreatePromDCE-20260507163212-24",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"name": "MSProm-centralus-aks-cluster-1",
"type": "Microsoft.Insights/dataCollectionEndpoints",
"location": "centralus",
"kind": "Linux",
"apiVersion": "2022-06-01",
"properties": {},
"tags": "[parameters('tagsForAllResources')]"
}
]
}
},
"subscriptionId": "c97afaf5-14ae-40aa-ad2b-3f39120dc8ba",
"resourceGroup": "AKS_Course_RG",
"dependsOn": [
"[concat('Microsoft.ContainerService/managedClusters/', parameters('resourceName'))]"
]
},
{
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"name": "CreatePromDCR-20260507163212-61",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"type": "Microsoft.Insights/dataCollectionRules",
"location": "centralus",
"name": "MSProm-centralus-aks-cluster-1",
"apiVersion": "2022-06-01",
"kind": "Linux",
"properties": {
"dataCollectionEndpointId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.Insights/dataCollectionEndpoints/MSProm-centralus-aks-cluster-1",
"dataSources": {
"prometheusForwarder": [
{
"name": "PrometheusDataSource",
"streams": [
"Microsoft-PrometheusMetrics"
],
"labelIncludeFilter": {}
}
]
},
"destinations": {
"monitoringAccounts": [
{
"accountResourceId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.monitor/accounts/defaultazuremonitorworkspace-cus",
"name": "MonitoringAccount1"
}
]
},
"dataFlows": [
{
"destinations": [
"MonitoringAccount1"
],
"streams": [
"Microsoft-PrometheusMetrics"
]
}
]
},
"tags": "[parameters('tagsForAllResources')]"
}
]
}
},
"dependsOn": [
"Microsoft.Resources/deployments/CreatePromDCE-20260507163212-24",
"Microsoft.Resources/deployments/CreateAzureMonitorWorkspace-20260507163212-6"
],
"subscriptionId": "c97afaf5-14ae-40aa-ad2b-3f39120dc8ba",
"resourceGroup": "AKS_Course_RG"
},
{
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"name": "CreatePromDCRA-20260507163212-8",
"dependsOn": [
"Microsoft.Resources/deployments/CreatePromDCR-20260507163212-61"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"type": "Microsoft.Insights/dataCollectionRuleAssociations",
"name": "ContainerInsightsMetricsExtension",
"apiVersion": "2022-06-01",
"properties": {
"description": "Association of data collection rule. Deleting this association will break the prometheus metrics data collection for this AKS Cluster.",
"dataCollectionRuleId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.Insights/dataCollectionRules/MSProm-centralus-aks-cluster-1"
},
"id": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/aks_course_rg/providers/microsoft.containerservice/managedclusters/aks-cluster-1/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsMetricsExtension",
"scope": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/aks_course_rg/providers/microsoft.containerservice/managedclusters/aks-cluster-1"
}
]
}
},
"subscriptionId": "c97afaf5-14ae-40aa-ad2b-3f39120dc8ba",
"resourceGroup": "AKS_Course_RG"
},
{
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"name": "CreatePromRecordingRules-20260507163212-32",
"subscriptionId": "c97afaf5-14ae-40aa-ad2b-3f39120dc8ba",
"resourceGroup": "AKS_Course_RG",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"location": "centralus",
"name": "NodeRecordingRulesRuleGroup-aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"properties": {
"description": "Node Recording Rules RuleGroup",
"enabled": true,
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.monitor/accounts/defaultazuremonitorworkspace-cus",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1"
],
"interval": "PT1M",
"clusterName": "aks-cluster-1",
"rules": [
{
"record": "instance:node_num_cpu:sum",
"expression": "count without (cpu, mode) ( node_cpu_seconds_total{job=\"node\",mode=\"idle\"})"
},
{
"record": "instance:node_cpu_utilisation:rate5m",
"expression": "1 - avg without (cpu) ( sum without (mode) (rate(node_cpu_seconds_total{job=\"node\", mode=~\"idle|iowait|steal\"}[5m])))"
},
{
"record": "instance:node_load1_per_cpu:ratio",
"expression": "( node_load1{job=\"node\"}/ instance:node_num_cpu:sum{job=\"node\"})"
},
{
"record": "instance:node_memory_utilisation:ratio",
"expression": "1 - ( ( node_memory_MemAvailable_bytes{job=\"node\"} or ( node_memory_Buffers_bytes{job=\"node\"} + node_memory_Cached_bytes{job=\"node\"} + node_memory_MemFree_bytes{job=\"node\"} + node_memory_Slab_bytes{job=\"node\"} ) )/ node_memory_MemTotal_bytes{job=\"node\"})"
},
{
"record": "instance:node_vmstat_pgmajfault:rate5m",
"expression": "rate(node_vmstat_pgmajfault{job=\"node\"}[5m])"
},
{
"record": "instance_device:node_disk_io_time_seconds:rate5m",
"expression": "rate(node_disk_io_time_seconds_total{job=\"node\", device!=\"\"}[5m])"
},
{
"record": "instance_device:node_disk_io_time_weighted_seconds:rate5m",
"expression": "rate(node_disk_io_time_weighted_seconds_total{job=\"node\", device!=\"\"}[5m])"
},
{
"record": "instance:node_network_receive_bytes_excluding_lo:rate5m",
"expression": "sum without (device) ( rate(node_network_receive_bytes_total{job=\"node\", device!=\"lo\"}[5m]))"
},
{
"record": "instance:node_network_transmit_bytes_excluding_lo:rate5m",
"expression": "sum without (device) ( rate(node_network_transmit_bytes_total{job=\"node\", device!=\"lo\"}[5m]))"
},
{
"record": "instance:node_network_receive_drop_excluding_lo:rate5m",
"expression": "sum without (device) ( rate(node_network_receive_drop_total{job=\"node\", device!=\"lo\"}[5m]))"
},
{
"record": "instance:node_network_transmit_drop_excluding_lo:rate5m",
"expression": "sum without (device) ( rate(node_network_transmit_drop_total{job=\"node\", device!=\"lo\"}[5m]))"
}
]
},
"apiVersion": "2023-03-01",
"tags": "[parameters('tagsForAllResources')]"
},
{
"location": "centralus",
"name": "KubernetesRecordingRulesRuleGroup-aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"properties": {
"description": "Kubernetes Recording Rules RuleGroup",
"enabled": true,
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.monitor/accounts/defaultazuremonitorworkspace-cus",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1"
],
"interval": "PT1M",
"clusterName": "aks-cluster-1",
"rules": [
{
"record": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate",
"expression": "sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "node_namespace_pod_container:container_memory_working_set_bytes",
"expression": "container_memory_working_set_bytes{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "node_namespace_pod_container:container_memory_rss",
"expression": "container_memory_rss{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "node_namespace_pod_container:container_memory_cache",
"expression": "container_memory_cache{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "node_namespace_pod_container:container_memory_swap",
"expression": "container_memory_swap{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))"
},
{
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests",
"expression": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))"
},
{
"record": "namespace_memory:kube_pod_container_resource_requests:sum",
"expression": "sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~\"Pending|Running\"} == 1 ) ))"
},
{
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests",
"expression": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))"
},
{
"record": "namespace_cpu:kube_pod_container_resource_requests:sum",
"expression": "sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~\"Pending|Running\"} == 1 ) ))"
},
{
"record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits",
"expression": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))"
},
{
"record": "namespace_memory:kube_pod_container_resource_limits:sum",
"expression": "sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~\"Pending|Running\"} == 1 ) ))"
},
{
"record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits",
"expression": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1) )"
},
{
"record": "namespace_cpu:kube_pod_container_resource_limits:sum",
"expression": "sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~\"Pending|Running\"} == 1 ) ))"
},
{
"record": "namespace_workload_pod:kube_pod_owner:relabel",
"expression": "max by (cluster, namespace, workload, pod) ( label_replace( label_replace( kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"}, \"replicaset\", \"$1\", \"owner_name\", \"(.*)\" ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) ( 1, max by (replicaset, namespace, owner_name) ( kube_replicaset_owner{job=\"kube-state-metrics\"} ) ), \"workload\", \"$1\", \"owner_name\", \"(.*)\" ))",
"labels": {
"workload_type": "deployment"
}
},
{
"record": "namespace_workload_pod:kube_pod_owner:relabel",
"expression": "max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"DaemonSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\" ))",
"labels": {
"workload_type": "daemonset"
}
},
{
"record": "namespace_workload_pod:kube_pod_owner:relabel",
"expression": "max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"StatefulSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\" ))",
"labels": {
"workload_type": "statefulset"
}
},
{
"record": "namespace_workload_pod:kube_pod_owner:relabel",
"expression": "max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"Job\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\" ))",
"labels": {
"workload_type": "job"
}
},
{
"record": ":node_memory_MemAvailable_bytes:sum",
"expression": "sum( node_memory_MemAvailable_bytes{job=\"node\"} or ( node_memory_Buffers_bytes{job=\"node\"} + node_memory_Cached_bytes{job=\"node\"} + node_memory_MemFree_bytes{job=\"node\"} + node_memory_Slab_bytes{job=\"node\"} )) by (cluster)"
},
{
"record": "cluster:node_cpu:ratio_rate5m",
"expression": "sum(rate(node_cpu_seconds_total{job=\"node\",mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m])) by (cluster) /count(sum(node_cpu_seconds_total{job=\"node\"}) by (cluster, instance, cpu)) by (cluster)"
}
]
},
"apiVersion": "2023-03-01",
"tags": "[parameters('tagsForAllResources')]"
},
{
"location": "centralus",
"name": "UXRecordingRulesRuleGroup - aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"properties": {
"description": "UX Recording Rules for Linux",
"enabled": true,
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.monitor/accounts/defaultazuremonitorworkspace-cus",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1"
],
"interval": "PT1M",
"clusterName": "aks-cluster-1",
"rules": [
{
"record": "ux:pod_cpu_usage:sum_irate",
"expression": "(sum by (namespace, pod, cluster, microsoft_resourceid) (\n\tirate(container_cpu_usage_seconds_total{container != \"\", pod != \"\", job = \"cadvisor\"}[5m])\n)) * on (pod, namespace, cluster, microsoft_resourceid) group_left (node, created_by_name, created_by_kind)\n(max by (node, created_by_name, created_by_kind, pod, namespace, cluster, microsoft_resourceid) (kube_pod_info{pod != \"\", job = \"kube-state-metrics\"}))"
},
{
"record": "ux:controller_cpu_usage:sum_irate",
"expression": "sum by (namespace, node, cluster, created_by_name, created_by_kind, microsoft_resourceid) (\nux:pod_cpu_usage:sum_irate\n)\n"
},
{
"record": "ux:pod_workingset_memory:sum",
"expression": "(\n\t sum by (namespace, pod, cluster, microsoft_resourceid) (\n\t\tcontainer_memory_working_set_bytes{container != \"\", pod != \"\", job = \"cadvisor\"}\n\t )\n\t) * on (pod, namespace, cluster, microsoft_resourceid) group_left (node, created_by_name, created_by_kind)\n(max by (node, created_by_name, created_by_kind, pod, namespace, cluster, microsoft_resourceid) (kube_pod_info{pod != \"\", job = \"kube-state-metrics\"}))"
},
{
"record": "ux:controller_workingset_memory:sum",
"expression": "sum by (namespace, node, cluster, created_by_name, created_by_kind, microsoft_resourceid) (\nux:pod_workingset_memory:sum\n)"
},
{
"record": "ux:pod_rss_memory:sum",
"expression": "(\n\t sum by (namespace, pod, cluster, microsoft_resourceid) (\n\t\tcontainer_memory_rss{container != \"\", pod != \"\", job = \"cadvisor\"}\n\t )\n\t) * on (pod, namespace, cluster, microsoft_resourceid) group_left (node, created_by_name, created_by_kind)\n(max by (node, created_by_name, created_by_kind, pod, namespace, cluster, microsoft_resourceid) (kube_pod_info{pod != \"\", job = \"kube-state-metrics\"}))"
},
{
"record": "ux:controller_rss_memory:sum",
"expression": "sum by (namespace, node, cluster, created_by_name, created_by_kind, microsoft_resourceid) (\nux:pod_rss_memory:sum\n)"
},
{
"record": "ux:pod_container_count:sum",
"expression": "sum by (node, created_by_name, created_by_kind, namespace, cluster, pod, microsoft_resourceid) (\n(\n(\nsum by (container, pod, namespace, cluster, microsoft_resourceid) (kube_pod_container_info{container != \"\", pod != \"\", container_id != \"\", job = \"kube-state-metrics\"})\nor sum by (container, pod, namespace, cluster, microsoft_resourceid) (kube_pod_init_container_info{container != \"\", pod != \"\", container_id != \"\", job = \"kube-state-metrics\"})\n)\n* on (pod, namespace, cluster, microsoft_resourceid) group_left (node, created_by_name, created_by_kind)\n(\nmax by (node, created_by_name, created_by_kind, pod, namespace, cluster, microsoft_resourceid) (\n\tkube_pod_info{pod != \"\", job = \"kube-state-metrics\"}\n)\n)\n)\n\n)"
},
{
"record": "ux:controller_container_count:sum",
"expression": "sum by (node, created_by_name, created_by_kind, namespace, cluster, microsoft_resourceid) (\nux:pod_container_count:sum\n)"
},
{
"record": "ux:pod_container_restarts:max",
"expression": "max by (node, created_by_name, created_by_kind, namespace, cluster, pod, microsoft_resourceid) (\n(\n(\nmax by (container, pod, namespace, cluster, microsoft_resourceid) (kube_pod_container_status_restarts_total{container != \"\", pod != \"\", job = \"kube-state-metrics\"})\nor sum by (container, pod, namespace, cluster, microsoft_resourceid) (kube_pod_init_status_restarts_total{container != \"\", pod != \"\", job = \"kube-state-metrics\"})\n)\n* on (pod, namespace, cluster, microsoft_resourceid) group_left (node, created_by_name, created_by_kind)\n(\nmax by (node, created_by_name, created_by_kind, pod, namespace, cluster, microsoft_resourceid) (\n\tkube_pod_info{pod != \"\", job = \"kube-state-metrics\"}\n)\n)\n)\n\n)"
},
{
"record": "ux:controller_container_restarts:max",
"expression": "max by (node, created_by_name, created_by_kind, namespace, cluster, microsoft_resourceid) (\nux:pod_container_restarts:max\n)"
},
{
"record": "ux:pod_resource_limit:sum",
"expression": "(sum by (cluster, pod, namespace, resource, microsoft_resourceid) (\n(\n\tmax by (cluster, microsoft_resourceid, pod, container, namespace, resource)\n\t (kube_pod_container_resource_limits{container != \"\", pod != \"\", job = \"kube-state-metrics\"})\n)\n)unless (count by (pod, namespace, cluster, resource, microsoft_resourceid)\n\t(kube_pod_container_resource_limits{container != \"\", pod != \"\", job = \"kube-state-metrics\"})\n!= on (pod, namespace, cluster, microsoft_resourceid) group_left()\n sum by (pod, namespace, cluster, microsoft_resourceid)\n (kube_pod_container_info{container != \"\", pod != \"\", job = \"kube-state-metrics\"}) \n)\n\n)* on (namespace, pod, cluster, microsoft_resourceid) group_left (node, created_by_kind, created_by_name)\n(\n\tkube_pod_info{pod != \"\", job = \"kube-state-metrics\"}\n)"
},
{
"record": "ux:controller_resource_limit:sum",
"expression": "sum by (cluster, namespace, created_by_name, created_by_kind, node, resource, microsoft_resourceid) (\nux:pod_resource_limit:sum\n)"
},
{
"record": "ux:controller_pod_phase_count:sum",
"expression": "sum by (cluster, phase, node, created_by_kind, created_by_name, namespace, microsoft_resourceid) ( (\n(kube_pod_status_phase{job=\"kube-state-metrics\",pod!=\"\"})\n or (label_replace((count(kube_pod_deletion_timestamp{job=\"kube-state-metrics\",pod!=\"\"}) by (namespace, pod, cluster, microsoft_resourceid) * count(kube_pod_status_reason{reason=\"NodeLost\", job=\"kube-state-metrics\"} == 0) by (namespace, pod, cluster, microsoft_resourceid)), \"phase\", \"terminating\", \"\", \"\"))) * on (pod, namespace, cluster, microsoft_resourceid) group_left (node, created_by_name, created_by_kind)\n(\nmax by (node, created_by_name, created_by_kind, pod, namespace, cluster, microsoft_resourceid) (\nkube_pod_info{job=\"kube-state-metrics\",pod!=\"\"}\n)\n)\n)"
},
{
"record": "ux:cluster_pod_phase_count:sum",
"expression": "sum by (cluster, phase, node, namespace, microsoft_resourceid) (\nux:controller_pod_phase_count:sum\n)"
},
{
"record": "ux:node_cpu_usage:sum_irate",
"expression": "sum by (instance, cluster, microsoft_resourceid) (\n(1 - irate(node_cpu_seconds_total{job=\"node\", mode=\"idle\"}[5m]))\n)"
},
{
"record": "ux:node_memory_usage:sum",
"expression": "sum by (instance, cluster, microsoft_resourceid) ((\nnode_memory_MemTotal_bytes{job = \"node\"}\n- node_memory_MemFree_bytes{job = \"node\"} \n- node_memory_cached_bytes{job = \"node\"}\n- node_memory_buffers_bytes{job = \"node\"}\n))"
},
{
"record": "ux:node_network_receive_drop_total:sum_irate",
"expression": "sum by (instance, cluster, microsoft_resourceid) (irate(node_network_receive_drop_total{job=\"node\", device!=\"lo\"}[5m]))"
},
{
"record": "ux:node_network_transmit_drop_total:sum_irate",
"expression": "sum by (instance, cluster, microsoft_resourceid) (irate(node_network_transmit_drop_total{job=\"node\", device!=\"lo\"}[5m]))"
}
]
},
"apiVersion": "2023-03-01",
"tags": "[parameters('tagsForAllResources')]"
},
{
"location": "centralus",
"name": "NodeRecordingRulesRuleGroup-Win-aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"properties": {
"description": "Node Recording Rules RuleGroup for Windows",
"enabled": false,
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.monitor/accounts/defaultazuremonitorworkspace-cus",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1"
],
"interval": "PT1M",
"clusterName": "aks-cluster-1",
"rules": [
{
"record": "node:windows_node:sum",
"expression": "count (windows_system_system_up_time{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_num_cpu:sum",
"expression": "count by (instance) (sum by (instance, core) (windows_cpu_time_total{job=\"windows-exporter\"}))"
},
{
"record": ":windows_node_cpu_utilisation:avg5m",
"expression": "1 - avg(rate(windows_cpu_time_total{job=\"windows-exporter\",mode=\"idle\"}[5m]))"
},
{
"record": "node:windows_node_cpu_utilisation:avg5m",
"expression": "1 - avg by (instance) (rate(windows_cpu_time_total{job=\"windows-exporter\",mode=\"idle\"}[5m]))"
},
{
"record": ":windows_node_memory_utilisation:",
"expression": "1 -sum(windows_memory_available_bytes{job=\"windows-exporter\"})/sum(windows_os_visible_memory_bytes{job=\"windows-exporter\"})"
},
{
"record": ":windows_node_memory_MemFreeCached_bytes:sum",
"expression": "sum(windows_memory_available_bytes{job=\"windows-exporter\"} + windows_memory_cache_bytes{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_memory_totalCached_bytes:sum",
"expression": "(windows_memory_cache_bytes{job=\"windows-exporter\"} + windows_memory_modified_page_list_bytes{job=\"windows-exporter\"} + windows_memory_standby_cache_core_bytes{job=\"windows-exporter\"} + windows_memory_standby_cache_normal_priority_bytes{job=\"windows-exporter\"} + windows_memory_standby_cache_reserve_bytes{job=\"windows-exporter\"})"
},
{
"record": ":windows_node_memory_MemTotal_bytes:sum",
"expression": "sum(windows_os_visible_memory_bytes{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_memory_bytes_available:sum",
"expression": "sum by (instance) ((windows_memory_available_bytes{job=\"windows-exporter\"}))"
},
{
"record": "node:windows_node_memory_bytes_total:sum",
"expression": "sum by (instance) (windows_os_visible_memory_bytes{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_memory_utilisation:ratio",
"expression": "(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum) / scalar(sum(node:windows_node_memory_bytes_total:sum))"
},
{
"record": "node:windows_node_memory_utilisation:",
"expression": "1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)"
},
{
"record": "node:windows_node_memory_swap_io_pages:irate",
"expression": "irate(windows_memory_swap_page_operations_total{job=\"windows-exporter\"}[5m])"
},
{
"record": ":windows_node_disk_utilisation:avg_irate",
"expression": "avg(irate(windows_logical_disk_read_seconds_total{job=\"windows-exporter\"}[5m]) + irate(windows_logical_disk_write_seconds_total{job=\"windows-exporter\"}[5m]))"
},
{
"record": "node:windows_node_disk_utilisation:avg_irate",
"expression": "avg by (instance) ((irate(windows_logical_disk_read_seconds_total{job=\"windows-exporter\"}[5m]) + irate(windows_logical_disk_write_seconds_total{job=\"windows-exporter\"}[5m])))"
}
]
},
"apiVersion": "2023-03-01",
"tags": "[parameters('tagsForAllResources')]"
},
{
"location": "centralus",
"name": "NodeAndKubernetesRecordingRulesRuleGroup-Win-aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"properties": {
"description": "Node and Kubernetes Recording Rules RuleGroup for Windows",
"enabled": false,
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.monitor/accounts/defaultazuremonitorworkspace-cus",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1"
],
"interval": "PT1M",
"clusterName": "aks-cluster-1",
"rules": [
{
"record": "node:windows_node_filesystem_usage:",
"expression": "max by (instance,volume)((windows_logical_disk_size_bytes{job=\"windows-exporter\"} - windows_logical_disk_free_bytes{job=\"windows-exporter\"}) / windows_logical_disk_size_bytes{job=\"windows-exporter\"})"
},
{
"record": "node:windows_node_filesystem_avail:",
"expression": "max by (instance, volume) (windows_logical_disk_free_bytes{job=\"windows-exporter\"} / windows_logical_disk_size_bytes{job=\"windows-exporter\"})"
},
{
"record": ":windows_node_net_utilisation:sum_irate",
"expression": "sum(irate(windows_net_bytes_total{job=\"windows-exporter\"}[5m]))"
},
{
"record": "node:windows_node_net_utilisation:sum_irate",
"expression": "sum by (instance) ((irate(windows_net_bytes_total{job=\"windows-exporter\"}[5m])))"
},
{
"record": ":windows_node_net_saturation:sum_irate",
"expression": "sum(irate(windows_net_packets_received_discarded_total{job=\"windows-exporter\"}[5m])) + sum(irate(windows_net_packets_outbound_discarded_total{job=\"windows-exporter\"}[5m]))"
},
{
"record": "node:windows_node_net_saturation:sum_irate",
"expression": "sum by (instance) ((irate(windows_net_packets_received_discarded_total{job=\"windows-exporter\"}[5m]) + irate(windows_net_packets_outbound_discarded_total{job=\"windows-exporter\"}[5m])))"
},
{
"record": "windows_pod_container_available",
"expression": "windows_container_available{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_total_runtime",
"expression": "windows_container_cpu_usage_seconds_total{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_memory_usage",
"expression": "windows_container_memory_usage_commit_bytes{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_private_working_set_usage",
"expression": "windows_container_memory_usage_private_working_set_bytes{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_network_received_bytes_total",
"expression": "windows_container_network_receive_bytes_total{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "windows_container_network_transmitted_bytes_total",
"expression": "windows_container_network_transmit_bytes_total{job=\"windows-exporter\", container_id != \"\"} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job=\"kube-state-metrics\", container_id != \"\"}) by(container, container_id, pod, namespace)"
},
{
"record": "kube_pod_windows_container_resource_memory_request",
"expression": "max by (namespace, pod, container) (kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}) * on(container,pod,namespace) (windows_pod_container_available)"
},
{
"record": "kube_pod_windows_container_resource_memory_limit",
"expression": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on(container,pod,namespace) (windows_pod_container_available)"
},
{
"record": "kube_pod_windows_container_resource_cpu_cores_request",
"expression": "max by (namespace, pod, container) ( kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}) * on(container,pod,namespace) (windows_pod_container_available)"
},
{
"record": "kube_pod_windows_container_resource_cpu_cores_limit",
"expression": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on(container,pod,namespace) (windows_pod_container_available)"
},
{
"record": "namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate",
"expression": "sum by (namespace, pod, container) (rate(windows_container_total_runtime{}[5m]))"
}
]
},
"apiVersion": "2023-03-01",
"tags": "[parameters('tagsForAllResources')]"
},
{
"location": "centralus",
"name": "UXRecordingRulesRuleGroup-Win - aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"properties": {
"description": "UX Recording Rules for Windows",
"enabled": false,
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.monitor/accounts/defaultazuremonitorworkspace-cus",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1"
],
"interval": "PT1M",
"clusterName": "aks-cluster-1",
"rules": [
{
"record": "ux:pod_cpu_usage_windows:sum_irate",
"expression": "sum by (cluster, pod, namespace, node, created_by_kind, created_by_name, microsoft_resourceid) (\n\t(\n\t\tmax by (instance, container_id, cluster, microsoft_resourceid) (\n\t\t\tirate(windows_container_cpu_usage_seconds_total{ container_id != \"\", job = \"windows-exporter\"}[5m])\n\t\t) * on (container_id, cluster, microsoft_resourceid) group_left (container, pod, namespace) (\n\t\t\tmax by (container, container_id, pod, namespace, cluster, microsoft_resourceid) (\n\t\t\t\tkube_pod_container_info{container != \"\", pod != \"\", container_id != \"\", job = \"kube-state-metrics\"}\n\t\t\t)\n\t\t)\n\t) * on (pod, namespace, cluster, microsoft_resourceid) group_left (node, created_by_name, created_by_kind)\n\t(\n\t\tmax by (node, created_by_name, created_by_kind, pod, namespace, cluster, microsoft_resourceid) (\n\t\t kube_pod_info{ pod != \"\", job = \"kube-state-metrics\"}\n\t\t)\n\t)\n)"
},
{
"record": "ux:controller_cpu_usage_windows:sum_irate",
"expression": "sum by (namespace, node, cluster, created_by_name, created_by_kind, microsoft_resourceid) (\nux:pod_cpu_usage_windows:sum_irate\n)\n"
},
{
"record": "ux:pod_workingset_memory_windows:sum",
"expression": "sum by (cluster, pod, namespace, node, created_by_kind, created_by_name, microsoft_resourceid) (\n\t(\n\t\tmax by (instance, container_id, cluster, microsoft_resourceid) (\n\t\t\twindows_container_memory_usage_private_working_set_bytes{ container_id != \"\", job = \"windows-exporter\"}\n\t\t) * on (container_id, cluster, microsoft_resourceid) group_left (container, pod, namespace) (\n\t\t\tmax by (container, container_id, pod, namespace, cluster, microsoft_resourceid) (\n\t\t\t\tkube_pod_container_info{container != \"\", pod != \"\", container_id != \"\", job = \"kube-state-metrics\"}\n\t\t\t)\n\t\t)\n\t) * on (pod, namespace, cluster, microsoft_resourceid) group_left (node, created_by_name, created_by_kind)\n\t(\n\t\tmax by (node, created_by_name, created_by_kind, pod, namespace, cluster, microsoft_resourceid) (\n\t\t kube_pod_info{ pod != \"\", job = \"kube-state-metrics\"}\n\t\t)\n\t)\n)"
},
{
"record": "ux:controller_workingset_memory_windows:sum",
"expression": "sum by (namespace, node, cluster, created_by_name, created_by_kind, microsoft_resourceid) (\nux:pod_workingset_memory_windows:sum\n)"
},
{
"record": "ux:node_cpu_usage_windows:sum_irate",
"expression": "sum by (instance, cluster, microsoft_resourceid) (\n(1 - irate(windows_cpu_time_total{job=\"windows-exporter\", mode=\"idle\"}[5m]))\n)"
},
{
"record": "ux:node_memory_usage_windows:sum",
"expression": "sum by (instance, cluster, microsoft_resourceid) ((\nwindows_os_visible_memory_bytes{job = \"windows-exporter\"}\n- windows_memory_available_bytes{job = \"windows-exporter\"}\n))"
},
{
"record": "ux:node_network_packets_received_drop_total_windows:sum_irate",
"expression": "sum by (instance, cluster, microsoft_resourceid) (irate(windows_net_packets_received_discarded_total{job=\"windows-exporter\", device!=\"lo\"}[5m]))"
},
{
"record": "ux:node_network_packets_outbound_drop_total_windows:sum_irate",
"expression": "sum by (instance, cluster, microsoft_resourceid) (irate(windows_net_packets_outbound_discarded_total{job=\"windows-exporter\", device!=\"lo\"}[5m]))"
}
]
},
"apiVersion": "2023-03-01",
"tags": "[parameters('tagsForAllResources')]"
}
]
}
},
"dependsOn": [
"[concat('Microsoft.ContainerService/managedClusters/', parameters('resourceName'))]",
"Microsoft.Resources/deployments/CreateAzureMonitorWorkspace-20260507163212-6"
]
},
{
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"name": "CreateAzureMonitorWorkspace-20260507163212-6",
"subscriptionId": "c97afaf5-14ae-40aa-ad2b-3f39120dc8ba",
"resourceGroup": "AKS_Course_RG",
"dependsOn": [
"Microsoft.Resources/deployments/CreatePromDCE-20260507163212-24"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"type": "microsoft.monitor/accounts",
"name": "defaultazuremonitorworkspace-cus",
"location": "centralus",
"properties": {},
"apiVersion": "2023-04-03",
"tags": "[parameters('tagsForAllResources')]"
}
]
}
}
},
{
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"subscriptionId": "c97afaf5-14ae-40aa-ad2b-3f39120dc8ba",
"resourceGroup": "AKS_Course_RG",
"name": "CreateUserAssignedIdentity-20260507163212-78",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"type": "Microsoft.ManagedIdentity/userAssignedIdentities",
"name": "aks-cluster-1-uami",
"location": "centralus",
"properties": {},
"apiVersion": "2018-11-30"
}
],
"outputs": {
"principalId": {
"type": "string",
"value": "[reference(resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', 'aks-cluster-1-uami'), '2018-11-30', 'Full').properties.principalId]"
}
}
},
"expressionEvaluationOptions": {
"scope": "inner"
}
}
},
{
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"name": "AddNetworkContributorRoleAssignmentToVirtualNe-20260507163212-44",
"subscriptionId": "c97afaf5-14ae-40aa-ad2b-3f39120dc8ba",
"resourceGroup": "AKS_Course_RG",
"dependsOn": [
"Microsoft.Resources/deployments/CreateUserAssignedIdentity-20260507163212-78",
"VnetDeployment-c3ce6448-cb07-f66c-a8ec-b7b242b56e17"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"type": "Microsoft.Authorization/roleAssignments",
"apiVersion": "2018-09-01-preview",
"name": "7c9bb429-39c2-26d1-5734-766272c31ff0",
"properties": {
"roleDefinitionId": "[concat('/subscriptions/', subscription().subscriptionId, '/providers/Microsoft.Authorization/roleDefinitions/', '4d97b98b-1d4f-4787-a291-c67834d212e7')]",
"principalId": "[reference('CreateUserAssignedIdentity-20260507163212-78').outputs.principalId.value]",
"principalType": "ServicePrincipal"
},
"scope": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.Network/virtualNetworks/AKS_Course_RG-vnet"
}
]
}
}
},
{
"name": "VnetDeployment-c3ce6448-cb07-f66c-a8ec-b7b242b56e17",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"apiVersion": "2024-10-01",
"name": "AKS_Course_RG-vnet",
"type": "Microsoft.Network/virtualNetworks",
"location": "centralus",
"properties": {
"addressSpace": {
"addressPrefixes": [
"10.224.0.0/12"
]
},
"subnets": [
{
"name": "default",
"id": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/Microsoft.Network/virtualNetworks/AKS_Course_RG-vnet/subnets/default",
"properties": {
"addressPrefix": "10.224.0.0/16",
"serviceEndpoints": [
{
"service": "Microsoft.ContainerRegistry"
}
]
}
}
]
},
"tags": "[parameters('tagsForAllResources')]"
}
]
}
}
},
{
"type": "Microsoft.Resources/deployments",
"apiVersion": "2019-05-01",
"name": "ClusterOnboardingPut-055fd7b8-12d4-3ad6-c33e-0cd96a75a641",
"dependsOn": [
"Microsoft.Resources/deployments/CreatePromDCRA-20260507163212-8",
"Microsoft.Resources/deployments/CreatePromRecordingRules-20260507163212-32"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"apiVersion": "2025-05-01",
"type": "microsoft.containerservice/managedclusters",
"location": "[parameters('location')]",
"sku": "[parameters('clusterSku')]",
"name": "aks-cluster-1",
"properties": {
"kubernetesVersion": "[parameters('kubernetesVersion')]",
"enableRBAC": "[parameters('enableRBAC')]",
"dnsPrefix": "[parameters('dnsPrefix')]",
"nodeResourceGroup": "[parameters('nodeResourceGroup')]",
"disableLocalAccounts": "[parameters('disableLocalAccounts')]",
"aadProfile": "[if(parameters('enableAadProfile'), variables('defaultAadProfile'), null())]",
"autoUpgradeProfile": {
"upgradeChannel": "[parameters('upgradeChannel')]",
"nodeOSUpgradeChannel": "[parameters('nodeOSUpgradeChannel')]"
},
"agentPoolProfiles": [
{
"name": "agentpool",
"osDiskSizeGB": "[parameters('osDiskSizeGB')]",
"count": 2,
"enableAutoScaling": true,
"minCount": 2,
"maxCount": 5,
"vmSize": "Standard_D2als_v7",
"osType": "Linux",
"osSKU": "Ubuntu",
"type": "VirtualMachineScaleSets",
"mode": "System",
"maxPods": 110,
"availabilityZones": [
"1",
"2",
"3"
],
"nodeLabels": {},
"nodeTaints": [],
"enableNodePublicIP": false,
"tags": {
"Managed By": "Manually by Solomon"
},
"vnetSubnetID": "[parameters('vnetSubnetID')]"
}
],
"apiServerAccessProfile": "[if(parameters('isPrivateClusterSupported'), variables('defaultApiServerAccessProfile'), null())]",
"addonProfiles": {
"azurepolicy": "[if(parameters('isAzurePolicySupported'), variables('defaultAzurePolicy'), null())]",
"azureKeyvaultSecretsProvider": "[if(parameters('isSecretStoreCSIDDriverSupported'), variables('defaultSecrectStoreProvider'), null())]"
},
"diskEncryptionSetID": "[if(parameters('enableDiskEncryptionSetID'), parameters('diskEncryptionSetID'), null())]",
"networkProfile": {
"loadBalancerSku": "[parameters('loadBalancerSku')]",
"networkPlugin": "[parameters('networkPlugin')]",
"networkPluginMode": "[parameters('networkPluginMode')]",
"networkDataplane": "[parameters('networkDataplane')]",
"networkPolicy": "[parameters('networkPolicy')]",
"serviceCidr": "[parameters('serviceCidr')]",
"dnsServiceIP": "[parameters('dnsServiceIP')]",
"advancedNetworking": {
"enabled": "[or(parameters('enableContainerNetworkObservability'), parameters('enableContainerNetworkSecurity'))]",
"observability": {
"enabled": "[parameters('enableContainerNetworkObservability')]"
},
"security": {
"enabled": "[parameters('enableContainerNetworkSecurity')]"
}
}
},
"supportPlan": "[parameters('supportPlan')]",
"securityProfile": {
"imageCleaner": {
"enabled": "[parameters('isImageCleanerEnabled')]",
"intervalHours": "[parameters('imageCleanerIntervalHours')]"
},
"workloadIdentity": {
"enabled": "[parameters('enableWorkloadIdentity')]"
}
},
"oidcIssuerProfile": {
"enabled": "[parameters('enableOIDC')]"
},
"nodeProvisioningProfile": {
"mode": "[parameters('nodeAutoProvisioningMode')]"
},
"azureMonitorProfile": {
"metrics": {
"enabled": true,
"kubeStateMetrics": {
"metricLabelsAllowlist": "",
"metricAnnotationsAllowList": ""
}
}
}
},
"identity": "[parameters('clusterIdentity')]",
"extendedLocation": "[if(parameters('isLocationEdgeZone'), parameters('edgeZone'), null())]",
"tags": "[parameters('clusterTags')]"
}
]
}
},
"subscriptionId": "c97afaf5-14ae-40aa-ad2b-3f39120dc8ba",
"resourceGroup": "AKS_Course_RG"
},
{
"name": "InsightsActionGroupDepl-bf6dc3bc-ff33-0414-1920-fc5500d9a8e7",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2021-04-01",
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"type": "microsoft.insights/actionGroups",
"apiVersion": "2022-06-01",
"name": "RecommendedAlertRules-AG-d24114",
"location": "Global",
"properties": {
"groupShortName": "alertd24114",
"enabled": true,
"emailReceivers": [
{
"name": "Email_-EmailAction-",
"emailAddress": "dupo24@msn.com",
"useCommonAlertSchema": true
}
],
"armRoleReceivers": [],
"azureAppPushReceivers": []
},
"tags": "[parameters('tagsForAllResources')]"
}
]
}
}
},
{
"name": "InsightsMetricAlertsDepl-eaf0fe22-2162-4e10-fb6b-a84efaf1c87b",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2021-04-01",
"dependsOn": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1",
"[concat('Microsoft.Resources/deployments/', 'InsightsActionGroupDepl-bf6dc3bc-ff33-0414-1920-fc5500d9a8e7')]"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"type": "microsoft.insights/metricAlerts",
"apiVersion": "2018-03-01",
"name": "CPU Usage Percentage - aks-cluster-1",
"location": "Global",
"properties": {
"severity": 3,
"enabled": true,
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1"
],
"evaluationFrequency": "PT5M",
"windowSize": "PT5M",
"criteria": {
"allOf": [
{
"name": "Metric1",
"metricName": "node_cpu_usage_percentage",
"metricNamespace": "Microsoft.ContainerService/managedClusters",
"operator": "GreaterThan",
"timeAggregation": "Average",
"criterionType": "StaticThresholdCriterion",
"threshold": 95
}
],
"odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria"
},
"targetResourceType": "Microsoft.ContainerService/managedClusters",
"actions": [
{
"actionGroupId": "[resourceId('Microsoft.Insights/ActionGroups', 'RecommendedAlertRules-AG-d24114')]",
"webhookProperties": {}
}
],
"azureAppPushReceivers": [],
"armRoleReceivers": []
},
"tags": "[parameters('tagsForAllResources')]"
},
{
"type": "microsoft.insights/metricAlerts",
"apiVersion": "2018-03-01",
"name": "Memory Working Set Percentage - aks-cluster-1",
"location": "Global",
"properties": {
"severity": 3,
"enabled": true,
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1"
],
"evaluationFrequency": "PT5M",
"windowSize": "PT5M",
"criteria": {
"allOf": [
{
"name": "Metric1",
"metricName": "node_memory_working_set_percentage",
"metricNamespace": "Microsoft.ContainerService/managedClusters",
"operator": "GreaterThan",
"timeAggregation": "Average",
"criterionType": "StaticThresholdCriterion",
"threshold": 100
}
],
"odata.type": "Microsoft.Azure.Monitor.MultipleResourceMultipleMetricCriteria"
},
"targetResourceType": "Microsoft.ContainerService/managedClusters",
"actions": [
{
"actionGroupId": "[resourceId('Microsoft.Insights/ActionGroups', 'RecommendedAlertRules-AG-d24114')]",
"webhookProperties": {}
}
],
"azureAppPushReceivers": [],
"armRoleReceivers": []
},
"tags": "[parameters('tagsForAllResources')]"
}
]
}
}
},
{
"name": "PrometheusAlerts-faef20c0-db36-1cfc-7671-04e29155d346",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2021-04-01",
"dependsOn": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1",
"[concat('Microsoft.Resources/deployments/', 'InsightsActionGroupDepl-bf6dc3bc-ff33-0414-1920-fc5500d9a8e7')]",
"Microsoft.Resources/deployments/CreateAzureMonitorWorkspace-20260507163212-6"
],
"properties": {
"mode": "Incremental",
"template": {
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"resources": [
{
"name": "Prometheus Recommended Cluster level Alerts - aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"apiVersion": "2023-03-01",
"location": "centralus",
"properties": {
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"description": "Cluster level Alert RuleGroup-RecommendedAlerts",
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/microsoft.monitor/accounts/defaultazuremonitorworkspace-cus"
],
"clusterName": "aks-cluster-1",
"location": "centralus",
"rules": [
{
"alert": "KubeCPUQuotaOvercommit",
"enabled": true,
"expression": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"})) /sum(kube_node_status_allocatable{resource=\"cpu\", job=\"kube-state-metrics\"}) > 1.5",
"for": "PT5M",
"labels": {
"severity": "warning"
},
"annotations": {
"description": "Cluster {{ $labels.cluster}} has overcommitted CPU resource requests for Namespaces."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeMemoryQuotaOvercommit",
"enabled": true,
"expression": "sum(min without(resource) (kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"})) /sum(kube_node_status_allocatable{resource=\"memory\", job=\"kube-state-metrics\"}) > 1.5",
"for": "PT5M",
"annotations": {
"description": "Cluster {{ $labels.cluster}} has overcommitted memory resource requests for Namespaces."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeContainerOOMKilledCount",
"enabled": true,
"expression": "sum by (cluster,container,controller,namespace)(kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\"} * on(cluster,namespace,pod) group_left(controller) label_replace(kube_pod_owner, \"controller\", \"$1\", \"owner_name\", \"(.*)\")) > 0",
"for": "PT5M",
"annotations": {
"description": "Number of OOM killed containers is greater than 0."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeClientErrors",
"enabled": true,
"expression": "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (cluster, instance, job, namespace) / sum(rate(rest_client_requests_total[5m])) by (cluster, instance, job, namespace)) > 0.01",
"for": "PT15M",
"annotations": {
"description": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubePersistentVolumeFillingUp",
"enabled": true,
"expression": "kubelet_volume_stats_available_bytes{job=\"kubelet\"}/kubelet_volume_stats_capacity_bytes{job=\"kubelet\"} < 0.15 and kubelet_volume_stats_used_bytes{job=\"kubelet\"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\"}[6h], 4 * 24 * 3600) < 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"} == 1",
"for": "PT60M",
"annotations": {
"description": "Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT15M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubePersistentVolumeInodesFillingUp",
"enabled": true,
"expression": "kubelet_volume_stats_inodes_free{job=\"kubelet\"} / kubelet_volume_stats_inodes{job=\"kubelet\"} < 0.03",
"for": "PT15M",
"annotations": {
"description": "The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage }} free inodes."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubePersistentVolumeErrors",
"enabled": true,
"expression": "kube_persistentvolume_status_phase{phase=~\"Failed|Pending\",job=\"kube-state-metrics\"} > 0",
"for": "PT05M",
"annotations": {
"description": "The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeContainerWaiting",
"enabled": true,
"expression": "sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\"}) > 0",
"for": "PT60M",
"annotations": {
"description": "pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container {{ $labels.container}} has been in waiting state for longer than 1 hour."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeDaemonSetNotScheduled",
"enabled": true,
"expression": "kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"} - kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"} > 0",
"for": "PT15M",
"annotations": {
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeDaemonSetMisScheduled",
"enabled": true,
"expression": "kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"} > 0",
"for": "PT15M",
"annotations": {
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeQuotaAlmostFull",
"enabled": true,
"expression": "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"} / ignoring(instance, job, type)(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0) > 0.9 < 1",
"for": "PT15M",
"annotations": {
"description": "{{ $value | humanizePercentage }} usage of {{ $labels.resource }} in namespace {{ $labels.namespace }} in {{ $labels.cluster}}."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
}
]
}
},
{
"name": "Prometheus Recommended Node level Alerts - aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"apiVersion": "2023-03-01",
"location": "centralus",
"properties": {
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"description": "Node level Alert RuleGroup-RecommendedAlerts",
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/microsoft.monitor/accounts/defaultazuremonitorworkspace-cus"
],
"clusterName": "aks-cluster-1",
"location": "centralus",
"rules": [
{
"alert": "KubeNodeUnreachable",
"enabled": true,
"expression": "(kube_node_spec_taint{job=\"kube-state-metrics\",key=\"node.kubernetes.io/unreachable\",effect=\"NoSchedule\"} unless ignoring(key,value) kube_node_spec_taint{job=\"kube-state-metrics\",key=~\"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn\"}) == 1",
"for": "PT15M",
"annotations": {
"description": "{{ $labels.node }} in {{ $labels.cluster}} is unreachable and some workloads may be rescheduled."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeNodeReadinessFlapping",
"enabled": true,
"expression": "sum(changes(kube_node_status_condition{status=\"true\",condition=\"Ready\"}[15m])) by (cluster, node) > 2",
"for": "PT15M",
"annotations": {
"description": "The readiness status of node {{ $labels.node }} in {{ $labels.cluster}} has changed more than 2 times in the last 15 minutes."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
}
]
}
},
{
"name": "Prometheus Recommended Pod level Alerts - aks-cluster-1",
"type": "Microsoft.AlertsManagement/prometheusRuleGroups",
"apiVersion": "2023-03-01",
"location": "centralus",
"properties": {
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"description": "Pod level Alert RuleGroup-RecommendedAlerts",
"scopes": [
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/Microsoft.ContainerService/managedClusters/aks-cluster-1",
"/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourcegroups/AKS_Course_RG/providers/microsoft.monitor/accounts/defaultazuremonitorworkspace-cus"
],
"clusterName": "aks-cluster-1",
"location": "centralus",
"rules": [
{
"alert": "KubePVUsageHigh",
"enabled": true,
"expression": "avg by (namespace, controller, container, cluster)(((kubelet_volume_stats_used_bytes{job=\"kubelet\"} / on(namespace,cluster,pod,container) group_left kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}) * on(namespace, pod, cluster) group_left(controller) label_replace(kube_pod_owner, \"controller\", \"$1\", \"owner_name\", \"(.*)\")) > .8)",
"for": "PT15M",
"annotations": {
"description": "Average PV usage on pod {{ $labels.pod }} in container {{ $labels.container }} is greater than 80%."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeDeploymentReplicasMismatch",
"enabled": true,
"expression": "( kube_deployment_spec_replicas{job=\"kube-state-metrics\"} > kube_deployment_status_replicas_available{job=\"kube-state-metrics\"}) and ( changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\"}[10m]) == 0)",
"for": "PT15M",
"annotations": {
"description": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} in {{ $labels.cluster}} replica mismatch."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT15M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeStatefulSetReplicasMismatch",
"enabled": true,
"expression": "( kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\"} != kube_statefulset_status_replicas{job=\"kube-state-metrics\"}) and ( changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[10m]) == 0)",
"for": "PT15M",
"annotations": {
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} in {{ $labels.cluster}} replica mismatch."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeHpaReplicasMismatch",
"enabled": true,
"expression": "(kube_horizontalpodautoscaler_status_desired_replicas{job=\"kube-state-metrics\"} !=kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}) and(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"} >kube_horizontalpodautoscaler_spec_min_replicas{job=\"kube-state-metrics\"}) and(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"} <kube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"}) and changes(kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"}[15m]) == 0",
"for": "PT15M",
"annotations": {
"description": "Horizontal Pod Autoscaler in {{ $labels.cluster}} has not matched the desired number of replicas for longer than 15 minutes."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT15M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeHpaMaxedOut",
"enabled": true,
"expression": "kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"} ==kube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"}",
"for": "PT15M",
"annotations": {
"description": "Horizontal Pod Autoscaler in {{ $labels.cluster}} has been running at max replicas for longer than 15 minutes."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT15M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubePodCrashLooping",
"enabled": true,
"expression": "max_over_time(kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\", job=\"kube-state-metrics\"}[5m]) >= 1",
"for": "PT15M",
"annotations": {
"description": "{{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) in {{ $labels.cluster}} is restarting {{ printf \"%.2f\" $value }} / second."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeJobStale",
"enabled": true,
"expression": "sum by(namespace,cluster)(kube_job_spec_completions{job=\"kube-state-metrics\"}) - sum by(namespace,cluster)(kube_job_status_succeeded{job=\"kube-state-metrics\"}) > 0 ",
"for": "PT360M",
"annotations": {
"description": "Number of stale jobs older than six hours is greater than 0."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT15M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubePodContainerRestart",
"enabled": true,
"expression": "sum by (namespace, controller, container, cluster)(increase(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\"}[1h])* on(namespace, pod, cluster) group_left(controller) label_replace(kube_pod_owner, \"controller\", \"$1\", \"owner_name\", \"(.*)\")) > 0",
"for": "PT15M",
"annotations": {
"description": "Pod container restarted in last 1 hour."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubePodReadyStateLow",
"enabled": true,
"expression": "sum by (cluster,namespace,deployment)(kube_deployment_status_replicas_ready) / sum by (cluster,namespace,deployment)(kube_deployment_spec_replicas) <.8 or sum by (cluster,namespace,deployment)(kube_daemonset_status_number_ready) / sum by (cluster,namespace,deployment)(kube_daemonset_status_desired_number_scheduled) <.8 ",
"for": "PT5M",
"annotations": {
"description": "Ready state of pods is less than 80%."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT15M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubePodFailedState",
"enabled": true,
"expression": "sum by (cluster, namespace, controller) (kube_pod_status_phase{phase=\"failed\"} * on(namespace, pod, cluster) group_left(controller) label_replace(kube_pod_owner, \"controller\", \"$1\", \"owner_name\", \"(.*)\")) > 0",
"for": "PT5M",
"annotations": {
"description": "Number of pods in failed state are greater than 0."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT15M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubePodNotReadyByController",
"enabled": true,
"expression": "sum by (namespace, controller, cluster) (max by(namespace, pod, cluster) (kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"} ) * on(namespace, pod, cluster) group_left(controller)label_replace(kube_pod_owner,\"controller\",\"$1\",\"owner_name\",\"(.*)\")) > 0",
"for": "PT15M",
"annotations": {
"description": "{{ $labels.namespace }}/{{ $labels.pod }} in {{ $labels.cluster}} by controller is not ready."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeStatefulSetGenerationMismatch",
"enabled": true,
"expression": "kube_statefulset_status_observed_generation{job=\"kube-state-metrics\"} != kube_statefulset_metadata_generation{job=\"kube-state-metrics\"}",
"for": "PT15M",
"annotations": {
"description": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeJobFailed",
"enabled": true,
"expression": "kube_job_failed{job=\"kube-state-metrics\"} > 0",
"for": "PT15M",
"annotations": {
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} in {{ $labels.cluster}} failed to complete."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 3,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeContainerAverageCPUHigh",
"enabled": true,
"expression": "sum (rate(container_cpu_usage_seconds_total{image!=\"\", container!=\"POD\"}[5m])) by (pod,cluster,container,namespace) / sum(container_spec_cpu_quota{image!=\"\", container!=\"POD\"}/container_spec_cpu_period{image!=\"\", container!=\"POD\"}) by (pod,cluster,container,namespace) > .95",
"for": "PT5M",
"annotations": {
"description": "Average CPU usage per container is greater than 95%."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT15M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeContainerAverageMemoryHigh",
"enabled": true,
"expression": "avg by (namespace, controller, container, cluster)(((container_memory_working_set_bytes{container!=\"\", image!=\"\", container!=\"POD\"} / on(namespace,cluster,pod,container) group_left kube_pod_container_resource_limits{resource=\"memory\", node!=\"\"})*on(namespace, pod, cluster) group_left(controller) label_replace(kube_pod_owner, \"controller\", \"$1\", \"owner_name\", \"(.*)\")) > .95)",
"for": "PT10M",
"annotations": {
"description": "Average Memory usage per container is greater than 95%."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
},
{
"alert": "KubeletPodStartUpLatencyHigh",
"enabled": true,
"expression": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job=\"kubelet\"} > 60",
"for": "PT10M",
"annotations": {
"description": "Kubelet Pod startup latency is too high."
},
"resolveConfiguration": {
"autoResolved": true,
"timeToResolve": "PT10M"
},
"severity": 4,
"actions": [
{
"actionGroupId": "/subscriptions/c97afaf5-14ae-40aa-ad2b-3f39120dc8ba/resourceGroups/AKS_Course_RG/providers/microsoft.insights/actiongroups/RecommendedAlertRules-AG-d24114",
"webhookProperties": {}
}
]
}
]
}
}
]
}
}
}
],
"outputs": {
"controlPlaneFQDN": {
"type": "string",
"value": "[reference(concat('Microsoft.ContainerService/managedClusters/', parameters('resourceName'))).fqdn]"
}
}
}