[PATCH] fix for ia64 sched-domains code
[deliverable/linux.git] / arch / ia64 / kernel / domain.c
CommitLineData
1da177e4
LT
1/*
2 * arch/ia64/kernel/domain.c
3 * Architecture specific sched-domains builder.
4 *
5 * Copyright (C) 2004 Jesse Barnes
6 * Copyright (C) 2004 Silicon Graphics, Inc.
7 */
8
9#include <linux/sched.h>
10#include <linux/percpu.h>
11#include <linux/slab.h>
12#include <linux/cpumask.h>
13#include <linux/init.h>
14#include <linux/topology.h>
15#include <linux/nodemask.h>
16
687f1661 17#define SD_NODES_PER_DOMAIN 16
1da177e4
LT
18
19#ifdef CONFIG_NUMA
20/**
21 * find_next_best_node - find the next node to include in a sched_domain
22 * @node: node whose sched_domain we're building
23 * @used_nodes: nodes already in the sched_domain
24 *
25 * Find the next node to include in a given scheduling domain. Simply
26 * finds the closest node not already in the @used_nodes map.
27 *
28 * Should use nodemask_t.
29 */
7f1867a5 30static int find_next_best_node(int node, unsigned long *used_nodes)
1da177e4
LT
31{
32 int i, n, val, min_val, best_node = 0;
33
34 min_val = INT_MAX;
35
36 for (i = 0; i < MAX_NUMNODES; i++) {
37 /* Start at @node */
38 n = (node + i) % MAX_NUMNODES;
39
40 if (!nr_cpus_node(n))
41 continue;
42
43 /* Skip already used nodes */
44 if (test_bit(n, used_nodes))
45 continue;
46
47 /* Simple min distance search */
48 val = node_distance(node, n);
49
50 if (val < min_val) {
51 min_val = val;
52 best_node = n;
53 }
54 }
55
56 set_bit(best_node, used_nodes);
57 return best_node;
58}
59
60/**
61 * sched_domain_node_span - get a cpumask for a node's sched_domain
62 * @node: node whose cpumask we're constructing
63 * @size: number of nodes to include in this span
64 *
65 * Given a node, construct a good cpumask for its sched_domain to span. It
66 * should be one that prevents unnecessary balancing, but also spreads tasks
67 * out optimally.
68 */
7f1867a5 69static cpumask_t sched_domain_node_span(int node)
1da177e4
LT
70{
71 int i;
72 cpumask_t span, nodemask;
73 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
74
75 cpus_clear(span);
76 bitmap_zero(used_nodes, MAX_NUMNODES);
77
78 nodemask = node_to_cpumask(node);
79 cpus_or(span, span, nodemask);
80 set_bit(node, used_nodes);
81
82 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
83 int next_node = find_next_best_node(node, used_nodes);
84 nodemask = node_to_cpumask(next_node);
85 cpus_or(span, span, nodemask);
86 }
87
88 return span;
89}
90#endif
91
92/*
93 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
94 * can switch it on easily if needed.
95 */
96#ifdef CONFIG_SCHED_SMT
97static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
98static struct sched_group sched_group_cpus[NR_CPUS];
7f1867a5 99static int cpu_to_cpu_group(int cpu)
1da177e4
LT
100{
101 return cpu;
102}
103#endif
104
105static DEFINE_PER_CPU(struct sched_domain, phys_domains);
106static struct sched_group sched_group_phys[NR_CPUS];
7f1867a5 107static int cpu_to_phys_group(int cpu)
1da177e4
LT
108{
109#ifdef CONFIG_SCHED_SMT
110 return first_cpu(cpu_sibling_map[cpu]);
111#else
112 return cpu;
113#endif
114}
115
116#ifdef CONFIG_NUMA
117/*
118 * The init_sched_build_groups can't handle what we want to do with node
119 * groups, so roll our own. Now each node has its own list of groups which
120 * gets dynamically allocated.
121 */
122static DEFINE_PER_CPU(struct sched_domain, node_domains);
123static struct sched_group *sched_group_nodes[MAX_NUMNODES];
124
125static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
126static struct sched_group sched_group_allnodes[MAX_NUMNODES];
127
7f1867a5 128static int cpu_to_allnodes_group(int cpu)
1da177e4
LT
129{
130 return cpu_to_node(cpu);
131}
132#endif
133
134/*
7f1867a5
DG
135 * Build sched domains for a given set of cpus and attach the sched domains
136 * to the individual cpus
1da177e4 137 */
7f1867a5 138void build_sched_domains(const cpumask_t *cpu_map)
1da177e4
LT
139{
140 int i;
1da177e4
LT
141
142 /*
7f1867a5 143 * Set up domains for cpus specified by the cpu_map.
1da177e4 144 */
7f1867a5 145 for_each_cpu_mask(i, *cpu_map) {
1da177e4
LT
146 int group;
147 struct sched_domain *sd = NULL, *p;
148 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
149
7f1867a5 150 cpus_and(nodemask, nodemask, *cpu_map);
1da177e4
LT
151
152#ifdef CONFIG_NUMA
153 if (num_online_cpus()
154 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
155 sd = &per_cpu(allnodes_domains, i);
156 *sd = SD_ALLNODES_INIT;
7f1867a5 157 sd->span = *cpu_map;
1da177e4
LT
158 group = cpu_to_allnodes_group(i);
159 sd->groups = &sched_group_allnodes[group];
160 p = sd;
161 } else
162 p = NULL;
163
164 sd = &per_cpu(node_domains, i);
165 *sd = SD_NODE_INIT;
166 sd->span = sched_domain_node_span(cpu_to_node(i));
167 sd->parent = p;
7f1867a5 168 cpus_and(sd->span, sd->span, *cpu_map);
1da177e4
LT
169#endif
170
171 p = sd;
172 sd = &per_cpu(phys_domains, i);
173 group = cpu_to_phys_group(i);
174 *sd = SD_CPU_INIT;
175 sd->span = nodemask;
176 sd->parent = p;
177 sd->groups = &sched_group_phys[group];
178
179#ifdef CONFIG_SCHED_SMT
180 p = sd;
181 sd = &per_cpu(cpu_domains, i);
182 group = cpu_to_cpu_group(i);
183 *sd = SD_SIBLING_INIT;
184 sd->span = cpu_sibling_map[i];
7f1867a5 185 cpus_and(sd->span, sd->span, *cpu_map);
1da177e4
LT
186 sd->parent = p;
187 sd->groups = &sched_group_cpus[group];
188#endif
189 }
190
191#ifdef CONFIG_SCHED_SMT
192 /* Set up CPU (sibling) groups */
7f1867a5 193 for_each_cpu_mask(i, *cpu_map) {
1da177e4 194 cpumask_t this_sibling_map = cpu_sibling_map[i];
7f1867a5 195 cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
1da177e4
LT
196 if (i != first_cpu(this_sibling_map))
197 continue;
198
199 init_sched_build_groups(sched_group_cpus, this_sibling_map,
200 &cpu_to_cpu_group);
201 }
202#endif
203
204 /* Set up physical groups */
205 for (i = 0; i < MAX_NUMNODES; i++) {
206 cpumask_t nodemask = node_to_cpumask(i);
207
7f1867a5 208 cpus_and(nodemask, nodemask, *cpu_map);
1da177e4
LT
209 if (cpus_empty(nodemask))
210 continue;
211
212 init_sched_build_groups(sched_group_phys, nodemask,
213 &cpu_to_phys_group);
214 }
215
216#ifdef CONFIG_NUMA
7f1867a5 217 init_sched_build_groups(sched_group_allnodes, *cpu_map,
1da177e4
LT
218 &cpu_to_allnodes_group);
219
220 for (i = 0; i < MAX_NUMNODES; i++) {
221 /* Set up node groups */
222 struct sched_group *sg, *prev;
223 cpumask_t nodemask = node_to_cpumask(i);
224 cpumask_t domainspan;
225 cpumask_t covered = CPU_MASK_NONE;
226 int j;
227
7f1867a5 228 cpus_and(nodemask, nodemask, *cpu_map);
1da177e4
LT
229 if (cpus_empty(nodemask))
230 continue;
231
232 domainspan = sched_domain_node_span(i);
7f1867a5 233 cpus_and(domainspan, domainspan, *cpu_map);
1da177e4
LT
234
235 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
236 sched_group_nodes[i] = sg;
237 for_each_cpu_mask(j, nodemask) {
238 struct sched_domain *sd;
239 sd = &per_cpu(node_domains, j);
240 sd->groups = sg;
241 if (sd->groups == NULL) {
242 /* Turn off balancing if we have no groups */
243 sd->flags = 0;
244 }
245 }
246 if (!sg) {
247 printk(KERN_WARNING
248 "Can not alloc domain group for node %d\n", i);
249 continue;
250 }
251 sg->cpu_power = 0;
252 sg->cpumask = nodemask;
253 cpus_or(covered, covered, nodemask);
254 prev = sg;
255
256 for (j = 0; j < MAX_NUMNODES; j++) {
257 cpumask_t tmp, notcovered;
258 int n = (i + j) % MAX_NUMNODES;
259
260 cpus_complement(notcovered, covered);
7f1867a5 261 cpus_and(tmp, notcovered, *cpu_map);
1da177e4
LT
262 cpus_and(tmp, tmp, domainspan);
263 if (cpus_empty(tmp))
264 break;
265
266 nodemask = node_to_cpumask(n);
267 cpus_and(tmp, tmp, nodemask);
268 if (cpus_empty(tmp))
269 continue;
270
271 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
272 if (!sg) {
273 printk(KERN_WARNING
274 "Can not alloc domain group for node %d\n", j);
275 break;
276 }
277 sg->cpu_power = 0;
278 sg->cpumask = tmp;
279 cpus_or(covered, covered, tmp);
280 prev->next = sg;
281 prev = sg;
282 }
283 prev->next = sched_group_nodes[i];
284 }
285#endif
286
287 /* Calculate CPU power for physical packages and nodes */
7f1867a5 288 for_each_cpu_mask(i, *cpu_map) {
1da177e4
LT
289 int power;
290 struct sched_domain *sd;
291#ifdef CONFIG_SCHED_SMT
292 sd = &per_cpu(cpu_domains, i);
293 power = SCHED_LOAD_SCALE;
294 sd->groups->cpu_power = power;
295#endif
296
297 sd = &per_cpu(phys_domains, i);
298 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
299 (cpus_weight(sd->groups->cpumask)-1) / 10;
300 sd->groups->cpu_power = power;
301
302#ifdef CONFIG_NUMA
303 sd = &per_cpu(allnodes_domains, i);
304 if (sd->groups) {
305 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
306 (cpus_weight(sd->groups->cpumask)-1) / 10;
307 sd->groups->cpu_power = power;
308 }
309#endif
310 }
311
312#ifdef CONFIG_NUMA
313 for (i = 0; i < MAX_NUMNODES; i++) {
314 struct sched_group *sg = sched_group_nodes[i];
315 int j;
316
317 if (sg == NULL)
318 continue;
319next_sg:
320 for_each_cpu_mask(j, sg->cpumask) {
321 struct sched_domain *sd;
322 int power;
323
324 sd = &per_cpu(phys_domains, j);
325 if (j != first_cpu(sd->groups->cpumask)) {
326 /*
327 * Only add "power" once for each
328 * physical package.
329 */
330 continue;
331 }
332 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
333 (cpus_weight(sd->groups->cpumask)-1) / 10;
334
335 sg->cpu_power += power;
336 }
337 sg = sg->next;
338 if (sg != sched_group_nodes[i])
339 goto next_sg;
340 }
341#endif
342
343 /* Attach the domains */
367ae3cd 344 for_each_cpu_mask(i, *cpu_map) {
1da177e4
LT
345 struct sched_domain *sd;
346#ifdef CONFIG_SCHED_SMT
347 sd = &per_cpu(cpu_domains, i);
348#else
349 sd = &per_cpu(phys_domains, i);
350#endif
351 cpu_attach_domain(sd, i);
352 }
353}
7f1867a5
DG
354/*
355 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
356 */
357void arch_init_sched_domains(const cpumask_t *cpu_map)
358{
359 cpumask_t cpu_default_map;
360
361 /*
362 * Setup mask for cpus without special case scheduling requirements.
363 * For now this just excludes isolated cpus, but could be used to
364 * exclude other special cases in the future.
365 */
366 cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
367
368 build_sched_domains(&cpu_default_map);
369}
1da177e4 370
7f1867a5 371void arch_destroy_sched_domains(const cpumask_t *cpu_map)
1da177e4
LT
372{
373#ifdef CONFIG_NUMA
374 int i;
375 for (i = 0; i < MAX_NUMNODES; i++) {
7f1867a5 376 cpumask_t nodemask = node_to_cpumask(i);
1da177e4 377 struct sched_group *oldsg, *sg = sched_group_nodes[i];
7f1867a5
DG
378
379 cpus_and(nodemask, nodemask, *cpu_map);
380 if (cpus_empty(nodemask))
381 continue;
382
1da177e4
LT
383 if (sg == NULL)
384 continue;
385 sg = sg->next;
386next_sg:
387 oldsg = sg;
388 sg = sg->next;
389 kfree(oldsg);
390 if (oldsg != sched_group_nodes[i])
391 goto next_sg;
392 sched_group_nodes[i] = NULL;
393 }
394#endif
395}
396
This page took 0.0817099999999999 seconds and 5 git commands to generate.