Browse Source

Remove dynamic include in logging_fluentd role

This commit removes dynamic include in favor of and
import_tasks to avoid potential memory ballooing issue.

This commit creates a temporary script for labeling
nodes and adds a sleep command to the script to allow
for delay between labeling nodes so the scheduler
does not get overwhelmed.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1557290
Michael Gugino 7 years ago
parent
commit
6e15eb00f2

+ 20 - 8
roles/openshift_logging_fluentd/tasks/label_and_wait.yaml

@@ -1,10 +1,22 @@
 ---
-- name: Label {{ node }} for Fluentd deployment
-  oc_label:
-    name: "{{ node }}"
-    kind: node
-    state: add
-    labels: "{{ openshift_logging_fluentd_nodeselector | lib_utils_oo_dict_to_list_of_dict }}"
+# This script is a special case because we need to pause between nodes while
+# labeling to avoid overloading the scheduler.
+# Also, looping over this task file (label_and_wait) causes memory to balloon
+# in some instances due to dynamic include bug in ansible, so we can't add the
+# wait in ansible directly.
+- name: Create temporary fluentd labeling script
+  template:
+    src: fluentd_label.sh.j2
+    dest: /tmp/fluentd_label.temp.sh
+    mode: "0744"
 
-# wait half a second between labels
-- local_action: command sleep {{ openshift_logging_fluentd_label_delay | default('.5') }}
+- name: Execute the fluentd temporary labeling script
+  command: "/tmp/fluentd_label.temp.sh {{ fluentd_host }}"
+  with_items: "{{ openshift_logging_fluentd_hosts }}"
+  loop_control:
+    loop_var: fluentd_host
+
+- name: Remove temporary fluentd labeling script
+  file:
+    path: /tmp/fluentd_label.temp.sh
+    state: absent

+ 1 - 6
roles/openshift_logging_fluentd/tasks/main.yaml

@@ -212,12 +212,7 @@
     openshift_logging_fluentd_hosts: "{{ fluentd_hosts.results.results[0]['items'] | map(attribute='metadata.name') | list }}"
   when: "'--all' in openshift_logging_fluentd_hosts"
 
-- include_tasks: label_and_wait.yaml
-  vars:
-    node: "{{ fluentd_host }}"
-  with_items: "{{ openshift_logging_fluentd_hosts }}"
-  loop_control:
-    loop_var: fluentd_host
+- import_tasks: label_and_wait.yaml
 
 - name: Delete temp directory
   file:

+ 8 - 0
roles/openshift_logging_fluentd/templates/fluentd_label.sh.j2

@@ -0,0 +1,8 @@
+#!/bin/bash
+
+{{ openshift_client_binary }} --config=/etc/origin/master/admin.kubeconfig \
+  label --overwrite node $1 \
+  {% for k, v in openshift_logging_fluentd_nodeselector.items() %} {{ k }}={{ v }} {% endfor %}
+
+# We sleep here because scheduler will get overwhelmed if we label all nodes at once.
+sleep {{ openshift_logging_fluentd_label_delay }}