Ansible nagios-server role
<yambe:breadcrumb>Ansible_roles|Ansible roles</yambe:breadcrumb>
ansible nagios-server role
A nagios-server role for ansible for configuring nagios server which can monitor both public services and internal details using nrpe can be created using following steps:
Create roles/nagios-server folder
mkdir -p roles/nagios-server
Create roles/nagios-server/{files,handlers,tasks,templates} folders
mkdir -p roles/nagios-server/{files,handlers,tasks,templates}
Change working directory to roles/nagios-server folder
cd roles/nagios-server
Create files/commands.cfg file with following contents:
# 'notify-host-by-email' command definition define command{ command_name notify-host-by-email command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$ } # 'notify-service-by-email' command definition define command{ command_name notify-service-by-email command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$ } # notify-service for nrpe' command definition define command{ command_name check_nrpe command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ } # This command checks to see if a host is "alive" by pinging it # The check must result in a 100% packet loss or 5 second (5000ms) round trip # average time to produce a critical error. # Note: Five ICMP echo packets are sent (determined by the '-p 5' argument) # 'check-host-alive' command definition define command{ command_name check-host-alive command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5 } # 'check_local_disk' command definition define command{ command_name check_local_disk command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$ } # 'check_local_load' command definition define command{ command_name check_local_load command_line $USER1$/check_load -w $ARG1$ -c $ARG2$ } # 'check_local_procs' command definition define command{ command_name check_local_procs command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$ } # 'check_local_users' command definition define command{ command_name check_local_users command_line $USER1$/check_users -w $ARG1$ -c $ARG2$ } # 'check_local_swap' command definition define command{ command_name check_local_swap command_line $USER1$/check_swap -w $ARG1$ -c $ARG2$ } # 'check_local_mrtgtraf' command definition define command{ command_name check_local_mrtgtraf command_line $USER1$/check_mrtgtraf -F $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ } ################################################################################ # NOTE: The following 'check_...' commands are used to monitor services on # both local and remote hosts. ################################################################################ # 'check_ftp' command definition define command{ command_name check_ftp command_line $USER1$/check_ftp -H $HOSTADDRESS$ $ARG1$ } # 'check_hpjd' command definition define command{ command_name check_hpjd command_line $USER1$/check_hpjd -H $HOSTADDRESS$ $ARG1$ } # 'check_snmp' command definition define command{ command_name check_snmp command_line $USER1$/check_snmp -H $HOSTADDRESS$ $ARG1$ } # 'check_http' command definition define command{ command_name check_http command_line $USER1$/check_http -I $HOSTADDRESS$ $ARG1$ } # 'check_ssh' command definition define command{ command_name check_ssh command_line $USER1$/check_ssh $ARG1$ $HOSTADDRESS$ } # 'check_dhcp' command definition define command{ command_name check_dhcp command_line $USER1$/check_dhcp $ARG1$ } # 'check_ping' command definition define command{ command_name check_ping command_line $USER1$/check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5 } # 'check_pop' command definition define command{ command_name check_pop command_line $USER1$/check_pop -H $HOSTADDRESS$ $ARG1$ } # 'check_imap' command definition define command{ command_name check_imap command_line $USER1$/check_imap -H $HOSTADDRESS$ $ARG1$ } # 'check_smtp' command definition define command{ command_name check_smtp command_line $USER1$/check_smtp -H $HOSTADDRESS$ $ARG1$ } # 'check_tcp' command definition define command{ command_name check_tcp command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$ } # 'check_udp' command definition define command{ command_name check_udp command_line $USER1$/check_udp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$ } # 'check_nt' command definition define command{ command_name check_nt command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$ } # 'process-host-perfdata' command definition define command{ command_name process-host-perfdata command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /var/log/nagios/host-perfdata.out } # 'process-service-perfdata' command definition define command{ command_name process-service-perfdata command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /var/log/nagios/service-perfdata.out }
Create files/index.html file with following contents:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-type" content="text/html;charset=UTF-8" /> <meta http-equiv="Refresh" content="0; URL=nagios" /> </head> <body> </body> </html>
Create empty files/localhost.cfg file with no content
Create handlers/main.yaml with following contents
--- - name: restart nagios service: name=nagios state=restarted
Create tasks/main.yaml with following contents:
--- - name: Install epel-release yum: name={{item}} state=present with_items: - epel-release - name: Install the necessary packages yum: name={{item}} state=present with_items: - nagios - nagios-devel - nagios-lcgdm - nagios-plugins-all - nagios-plugins-fts - nagios-plugins-lcgdm - pnp4nagios - nagios-plugins-nrpe - nrpe - name: Configure nagios to send alerts over email template: src=contacts.j2 dest='/etc/nagios/objects/contacts.cfg' - name: Create server directory for client information file: path='/etc/nagios/servers' state=directory mode=0755 owner=root group=nagios - name: To maintain the clients through nagios lineinfile: dest='/etc/nagios/nagios.cfg' insertafter='cfg_dir=/etc/nagios/conf.d' line='cfg_dir=/etc/nagios/servers' - name: Add service information to client machine template: src=client_info.j2 dest='/etc/nagios/servers/all.cfg' notify: - restart nagios - name: Replace existing localhost.cfg file with emtpy file copy: src=localhost.cfg dest="/etc/nagios/objects/localhost.cfg" owner=root group=nagios mode=644 - name: Configure various custom command in commands.cfg copy: src=commands.cfg dest=/etc/nagios/objects/commands.cfg mode=664 - name: Set proper permissions on client files file: path={{item}} mode=0755 owner=root group=nagios with_items: - "/etc/nagios/objects/contacts.cfg" - "/etc/nagios/servers/all.cfg" - "/etc/nagios/objects/commands.cfg" - name: Verify the setup of nagios shell: nagios -v /etc/nagios/nagios.cfg - name: Start the nagios service service: name={{item}} state=started enabled=yes with_items: - nagios - nrpe - httpd - name: Setup automatic redirect to /nagios copy: src=index.html dest=/var/www/html/index.html owner=root group=root mode=444
Create templates/client_info.j2 with following contents:
###################################################remote service check using nagios################################################## {% for client in nagios_client_list %} #Define a host for local machine define host{ use linux-server ; Name of host template to use ; This host definition will inherit all variables that are defined ; in (or inherited by) the linux-server host template definition. host_name {{client.hostname}} address {{client.ip}} } {% for service in client.service %} {% if service == "ping" %} #Define a service to "ping" the local machine define service{ use local-service ; Name of service template to use host_name {{client.hostname}} service_description {{service}} check_command check_ping!100.0,20%!500.0,60% } {% endif %} {% if service == "ssh" %} #Define a service to check SSH on the local machine. define service{ use local-service ; Name of service template to use host_name {{client.hostname}} service_description {{service}} check_command check_ssh notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "http" %} # Define a service to check HTTP on the local machine. define service{ use local-service ; Name of service template to use host_name {{client.hostname}} service_description {{service}} check_command check_http notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "https" %} # Define a service to check HTTPS on the local machine. define service{ use generic-service; host_name {{client.hostname}} service_description {{service}} check_command check_tcp!443!-S notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "ftp" %} # Define a service to check FTP on the local machine. define service{ use generic-service ; Inherit default values from a template host_name {{client.hostname}} service_description {{service}} check_command check_ftp notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "smtp" %} # Define a service to check SMTP on the local machine. define service{ use generic-service ; Inherit default values from a template host_name {{client.hostname}} service_description {{service}} check_command check_smtp notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "imap" %} # Define a service to check IMAP on the local machine. define service{ use generic-service ; Inherit default values from a template host_name {{client.hostname}} service_description {{service}} check_command check_imap notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "imaps" %} # Define a service to check IMAPS on the local machine. define service{ use generic-service; host_name {{client.hostname}} service_description {{service}} check_command check_tcp!993!-S notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "pop3" %} # Define a service to check POP3 on the local machine. define service{ use generic-service ; Inherit default values from a template host_name {{client.hostname}} service_description {{service}} check_command check_pop notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "pop3s" %} # Define a service to check POP3S on the local machine. define service{ use generic-service; host_name {{client.hostname}} service_description {{service}} check_command check_tcp!995!-S notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} ###############################################internal service check using nagios###################################################### {% if service == "users" %} # Define a service to check Current USER on the local machine. # Disable notifications for this service by default, as not all users may have HTTP enabled. define service{ use generic-service host_name {{client.hostname}} service_description {{service}} check_command check_nrpe!check_users notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "load" %} # Define a service to check CPU Load on the local machine. # Disable notifications for this service by default, as not all users may have HTTP enabled. define service{ use generic-service host_name {{client.hostname}} service_description {{service}} check_command check_nrpe!check_load notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "processes" %} # Define a service to check PROCESSES on the local machine. # Disable notifications for this service by default, as not all users may have HTTP enabled. define service{ use generic-service host_name {{client.hostname}} service_description {{service}} check_command check_nrpe!check_total_procs! notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "disk" %} # Define a service to check DISK on the local machine. define service{ use generic-service host_name {{client.hostname}} service_description {{service}} check_command check_nrpe!check_disk notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "swap" %} # Define a service to check SWAP on the local machine. define service{ use generic-service; host_name {{client.hostname}} service_description {{service}} check_command check_nrpe!check_swap notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% if service == "zombie" %} # Define a service to check ZOMBIE on the local machine. define service{ use generic-service; host_name {{client.hostname}} service_description {{service}} check_command check_nrpe!check_zombie_procs notifications_enabled 1 contact_groups {{client.contactgroup}} } {% endif %} {% endfor %} {% endfor %}
Create templates/contacts.j2 with following contents:
{% for contact1 in nagios_contacts %} define contact{ contact_name {{contact1.name}} ; Short name of user use generic-contact ; Inherit default values from generic-contact template (defined above) alias {{contact1.alias}} ; Full name of user email {{contact1.email}} } {% endfor %} {% for contactgroup1 in nagios_contactgroups %} define contactgroup{ contactgroup_name {{contactgroup1.name}} alias {{contactgroup1.alias}} members {% for cg1member1 in contactgroup1.members %} {{cg1member1}} {% if not loop.last %} , {% endif %} {% endfor %} } {% endfor %}
Finally following variables need to be defined either in the host file implementing nagios-server role, or in common-vars or in vars/main.yaml of nagios-server role itself:
nagios_contacts: - { name: nagiosadmin, alias: "Nagios administrator", email: logs@sbarjatiya.com } - { name: saurabh, alias: "Saurabh", email: saurabh@sbarjatiya.com } nagios_contactgroups: #Do not remove or rename this group. Change alias or members as necessary. - { name: admins, alias: "Logs admin list", members: [ nagiosadmin ] } - { name: engineers, alias: "Saurabh and Nagios admin", members: [ nagiosadmin, saurabh ] } nagios_client_list: - { hostname: server1.sbarjatiya.com , ip: "10.4.20.201", contactgroup: admins, service: [ "ping", "ssh", "users", "disk", "load", "processes", "zombie", "swap" ] } - { hostname: server2.sbarjatiya.com , ip: "10.4.20.171", contactgroup: saurabh, service: [ "ping", "ssh", "users", "disk", "load", "processes", "zombie", "swap" ] }
Other service options are: imap, imaps, smtp, smtps, http, https
<yambe:breadcrumb>Ansible_roles|Ansible roles</yambe:breadcrumb>