1/* $NetBSD: balloon.c,v 1.17 2016/07/07 06:55:40 msaitoh Exp $ */
2
3/*-
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Cherry G. Mathew <cherry@zyx.in> and
9 * Jean-Yves Migeon <jym@NetBSD.org>
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * The Xen balloon driver enables growing and shrinking PV domains
35 * memory on the fly, by allocating and freeing memory pages directly.
36 * This management needs domain cooperation to work properly, especially
37 * during balloon_inflate() operation where a domain gives back memory to
38 * the hypervisor.
39 *
40 * Shrinking memory on a live system is a difficult task, and may render
41 * it unstable or lead to crash. The driver takes a conservative approach
42 * there by doing memory operations in smal steps of a few MiB each time. It
43 * will also refuse to decrease reservation below a certain threshold
44 * (XEN_RESERVATION_MIN), so as to avoid a complete kernel memory exhaustion.
45 *
46 * The user can intervene at two different levels to manage the ballooning
47 * of a domain:
48 * - directly within the domain using a sysctl(9) interface.
49 * - through the Xentools, by modifying the memory/target entry associated
50 * to a domain. This is usually done in dom0.
51 *
52 * Modification of the reservation is signaled by writing inside the
53 * memory/target node in Xenstore. Writing new values will fire the xenbus
54 * watcher, and wakeup the balloon thread to inflate or deflate balloon.
55 *
56 * Both sysctl(9) nodes and memory/target entry assume that the values passed
57 * to them are in KiB. Internally, the driver will convert this value in
58 * pages (assuming a page is PAGE_SIZE bytes), and issue the correct hypercalls
59 * to decrease/increase domain's reservation accordingly.
60 *
61 * XXX Pages used by balloon are tracked through entries stored in a SLIST.
62 * This allows driver to conveniently add/remove wired pages from memory
63 * without the need to support these "memory gaps" inside uvm(9). Still, the
64 * driver does not currently "plug" new pages into uvm(9) when more memory
65 * is available than originally managed by balloon. For example, deflating
66 * balloon with a total number of pages above physmem is not supported for
67 * now. See balloon_deflate() for more details.
68 *
69 */
70
71#define BALLOONDEBUG 0
72
73#include <sys/cdefs.h>
74__KERNEL_RCSID(0, "$NetBSD: balloon.c,v 1.17 2016/07/07 06:55:40 msaitoh Exp $");
75
76#include <sys/inttypes.h>
77#include <sys/device.h>
78#include <sys/param.h>
79
80#include <sys/atomic.h>
81#include <sys/condvar.h>
82#include <sys/kernel.h>
83#include <sys/kmem.h>
84#include <sys/kthread.h>
85#include <sys/mutex.h>
86#include <sys/pool.h>
87#include <sys/queue.h>
88#include <sys/sysctl.h>
89
90#include <xen/xen.h>
91#include <xen/xenbus.h>
92#include <xen/balloon.h>
93
94#include <uvm/uvm.h>
95#include <uvm/uvm.h>
96#include <xen/xenpmap.h>
97
98#include "locators.h"
99
100/*
101 * Number of MFNs stored in the array passed back and forth between domain
102 * and balloon/hypervisor, during balloon_inflate() / balloon_deflate(). These
103 * should fit in a page, for performance reasons.
104 */
105#define BALLOON_DELTA (PAGE_SIZE / sizeof(xen_pfn_t))
106
107/*
108 * Safeguard value. Refuse to go below this threshold, so that domain
109 * can keep some free pages for its own use. Value is arbitrary, and may
110 * evolve with time.
111 */
112#define BALLOON_BALLAST 256 /* In pages - 1MiB */
113#define XEN_RESERVATION_MIN (uvmexp.freemin + BALLOON_BALLAST) /* In pages */
114
115/* KB <-> PAGEs */
116#define PAGE_SIZE_KB (PAGE_SIZE >> 10) /* page size in KB */
117#define BALLOON_PAGES_TO_KB(_pg) ((uint64_t)_pg * PAGE_SIZE_KB)
118#define BALLOON_KB_TO_PAGES(_kb) (roundup(_kb, PAGE_SIZE_KB) / PAGE_SIZE_KB)
119
120/*
121 * A balloon page entry. Needed to track pages put/reclaimed from balloon
122 */
123struct balloon_page_entry {
124 struct vm_page *pg;
125 SLIST_ENTRY(balloon_page_entry) entry;
126};
127
128struct balloon_xenbus_softc {
129 device_t sc_dev;
130 struct sysctllog *sc_log;
131
132 kmutex_t balloon_mtx; /* Protects condvar, target and res_min (below) */
133 kcondvar_t balloon_cv; /* Condvar variable for target (below) */
134 size_t balloon_target; /* Target domain reservation size in pages. */
135 /* Minimum amount of memory reserved by domain, in KiB */
136 uint64_t balloon_res_min;
137
138 xen_pfn_t *sc_mfn_list; /* List of MFNs passed from/to balloon */
139 pool_cache_t bpge_pool; /* pool cache for balloon page entries */
140 /* linked list for tracking pages used by balloon */
141 SLIST_HEAD(, balloon_page_entry) balloon_page_entries;
142 size_t balloon_num_page_entries;
143};
144
145static size_t xenmem_get_currentreservation(void);
146static size_t xenmem_get_maxreservation(void);
147
148static int bpge_ctor(void *, void *, int);
149static void bpge_dtor(void *, void *);
150
151static void balloon_thread(void *);
152static size_t balloon_deflate(struct balloon_xenbus_softc*, size_t);
153static size_t balloon_inflate(struct balloon_xenbus_softc*, size_t);
154
155static void sysctl_kern_xen_balloon_setup(struct balloon_xenbus_softc *);
156static void balloon_xenbus_watcher(struct xenbus_watch *, const char **,
157 unsigned int);
158
159static int balloon_xenbus_match(device_t, cfdata_t, void *);
160static void balloon_xenbus_attach(device_t, device_t, void *);
161
162CFATTACH_DECL_NEW(balloon, sizeof(struct balloon_xenbus_softc),
163 balloon_xenbus_match, balloon_xenbus_attach, NULL, NULL);
164
165static struct xenbus_watch balloon_xenbus_watch = {
166 .node = __UNCONST("memory/target"),
167 .xbw_callback = balloon_xenbus_watcher,
168};
169
170static struct balloon_xenbus_softc *balloon_sc;
171
172static int
173balloon_xenbus_match(device_t parent, cfdata_t match, void *aux)
174{
175 struct xenbusdev_attach_args *xa = aux;
176
177 if (strcmp(xa->xa_type, "balloon") != 0)
178 return 0;
179
180 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
181 match->cf_loc[XENBUSCF_ID] != xa->xa_id)
182 return 0;
183
184 return 1;
185}
186
187static void
188balloon_xenbus_attach(device_t parent, device_t self, void *aux)
189{
190 xen_pfn_t *mfn_list;
191 size_t currentpages;
192 struct balloon_xenbus_softc *sc = balloon_sc = device_private(self);
193
194 aprint_normal(": Xen Balloon driver\n");
195 sc->sc_dev = self;
196
197 /* Initialize target mutex and condvar */
198 mutex_init(&sc->balloon_mtx, MUTEX_DEFAULT, IPL_NONE);
199 cv_init(&sc->balloon_cv, "xen_balloon");
200
201 SLIST_INIT(&sc->balloon_page_entries);
202 sc->balloon_num_page_entries = 0;
203
204 /* Get current number of pages */
205 currentpages = xenmem_get_currentreservation();
206
207 KASSERT(currentpages > 0);
208
209 /* Update initial target value - no need to lock for initialization */
210 sc->balloon_target = currentpages;
211
212 /* Set the values used by sysctl */
213 sc->balloon_res_min =
214 BALLOON_PAGES_TO_KB(XEN_RESERVATION_MIN);
215
216 aprint_normal_dev(self, "current reservation: %"PRIu64" KiB\n",
217 BALLOON_PAGES_TO_KB(currentpages));
218#if BALLOONDEBUG
219 aprint_normal_dev(self, "min reservation: %"PRIu64" KiB\n",
220 sc->balloon_res_min);
221 aprint_normal_dev(self, "max reservation: %"PRIu64" KiB\n",
222 BALLOON_PAGES_TO_KB(xenmem_get_maxreservation()));
223#endif
224
225 sc->bpge_pool = pool_cache_init(sizeof(struct balloon_page_entry),
226 0, 0, 0, "xen_bpge", NULL, IPL_NONE, bpge_ctor, bpge_dtor, NULL);
227
228 sysctl_kern_xen_balloon_setup(sc);
229
230 /* List of MFNs passed from/to balloon for inflating/deflating */
231 mfn_list = kmem_alloc(BALLOON_DELTA * sizeof(*mfn_list), KM_SLEEP);
232 sc->sc_mfn_list = mfn_list;
233
234 /* Setup xenbus node watch callback */
235 if (register_xenbus_watch(&balloon_xenbus_watch)) {
236 aprint_error_dev(self, "unable to watch memory/target\n");
237 goto error;
238 }
239
240 /* Setup kernel thread to asynchronously (in/de)-flate the balloon */
241 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, balloon_thread,
242 sc, NULL, "xen_balloon")) {
243 aprint_error_dev(self, "unable to create balloon thread\n");
244 unregister_xenbus_watch(&balloon_xenbus_watch);
245 goto error;
246 }
247
248 if (!pmf_device_register(self, NULL, NULL))
249 aprint_error_dev(self, "couldn't establish power handler\n");
250
251 return;
252
253error:
254 sysctl_teardown(&sc->sc_log);
255 cv_destroy(&sc->balloon_cv);
256 mutex_destroy(&sc->balloon_mtx);
257 return;
258
259}
260
261/*
262 * Returns maximum memory reservation available to current domain. In Xen
263 * with DOMID_SELF, this hypercall never fails: return value should be
264 * interpreted as unsigned.
265 *
266 */
267static size_t
268xenmem_get_maxreservation(void)
269{
270 int s;
271 unsigned int ret;
272
273 s = splvm();
274 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation,
275 & (domid_t) { DOMID_SELF });
276
277 splx(s);
278
279 if (ret == 0) {
280 /* well, a maximum reservation of 0 is really bogus */
281 panic("%s failed, maximum reservation returned 0", __func__);
282 }
283
284 return ret;
285}
286
287/* Returns current reservation, in pages */
288static size_t
289xenmem_get_currentreservation(void)
290{
291 int s, ret;
292
293 s = splvm();
294 ret = HYPERVISOR_memory_op(XENMEM_current_reservation,
295 & (domid_t) { DOMID_SELF });
296 splx(s);
297
298 if (ret < 0) {
299 panic("%s failed: %d", __func__, ret);
300 }
301
302 return ret;
303}
304
305/*
306 * Get value (in KiB) of memory/target in XenStore for current domain
307 * A return value of 0 can be considered as bogus or absent.
308 */
309static unsigned long long
310balloon_xenbus_read_target(void)
311{
312 unsigned long long new_target;
313 int err = xenbus_read_ull(NULL, "memory", "target", &new_target, 0);
314
315 switch(err) {
316 case 0:
317 return new_target;
318 case ENOENT:
319 break;
320 default:
321 device_printf(balloon_sc->sc_dev,
322 "error %d, couldn't read xenbus target node\n", err);
323 break;
324 }
325
326 return 0;
327}
328
329/* Set memory/target value (in KiB) in XenStore for current domain */
330static void
331balloon_xenbus_write_target(unsigned long long new_target)
332{
333 int err = xenbus_printf(NULL, "memory", "target", "%llu", new_target);
334
335 if (err != 0) {
336 device_printf(balloon_sc->sc_dev,
337 "error %d, couldn't write xenbus target node\n", err);
338 }
339
340 return;
341}
342
343static int
344bpge_ctor(void *arg, void *obj, int flags)
345{
346 struct balloon_page_entry *bpge = obj;
347
348 bpge->pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
349 if (bpge->pg == NULL)
350 return ENOMEM;
351
352 return 0;
353
354}
355
356static void
357bpge_dtor(void *arg, void *obj)
358{
359 struct balloon_page_entry *bpge = obj;
360
361 uvm_pagefree(bpge->pg);
362}
363
364/*
365 * Inflate balloon. Pages are moved out of domain's memory towards balloon.
366 */
367static size_t
368balloon_inflate(struct balloon_xenbus_softc *sc, size_t tpages)
369{
370 int rpages, s, ret;
371 paddr_t pa;
372 struct balloon_page_entry *bpg_entry;
373 xen_pfn_t *mfn_list = sc->sc_mfn_list;
374
375 struct xen_memory_reservation reservation = {
376 .address_bits = 0,
377 .extent_order = 0,
378 .domid = DOMID_SELF
379 };
380
381 KASSERT(tpages > 0);
382 KASSERT(tpages <= BALLOON_DELTA);
383
384 memset(mfn_list, 0, BALLOON_DELTA * sizeof(*mfn_list));
385
386 /* allocate pages that will be given to Hypervisor */
387 for (rpages = 0; rpages < tpages; rpages++) {
388
389 bpg_entry = pool_cache_get(sc->bpge_pool, PR_WAITOK);
390 if (bpg_entry == NULL) {
391 /* failed reserving a page for balloon */
392 break;
393 }
394
395 pa = VM_PAGE_TO_PHYS(bpg_entry->pg);
396
397 mfn_list[rpages] = xpmap_ptom(pa) >> PAGE_SHIFT;
398
399 s = splvm();
400 /* Invalidate pg */
401 xpmap_ptom_unmap(pa);
402 splx(s);
403
404 SLIST_INSERT_HEAD(&balloon_sc->balloon_page_entries,
405 bpg_entry, entry);
406 balloon_sc->balloon_num_page_entries++;
407 }
408
409 /* Hand over pages to Hypervisor */
410 set_xen_guest_handle(reservation.extent_start, mfn_list);
411 reservation.nr_extents = rpages;
412
413 s = splvm();
414 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
415 &reservation);
416 splx(s);
417
418 if (ret != rpages) {
419 /*
420 * we are in bad shape: the operation failed for certain
421 * MFNs. As the API does not allow us to know which frame
422 * numbers were erroneous, we cannot really recover safely.
423 */
424 panic("%s: decrease reservation failed: was %d, "
425 "returned %d", device_xname(sc->sc_dev), rpages, ret);
426 }
427
428#if BALLOONDEBUG
429 device_printf(sc->sc_dev, "inflate %zu => inflated by %d\n",
430 tpages, rpages);
431#endif
432 return rpages;
433}
434
435/*
436 * Deflate balloon. Pages are given back to domain's memory.
437 */
438static size_t
439balloon_deflate(struct balloon_xenbus_softc *sc, size_t tpages)
440{
441 int rpages, s, ret;
442 paddr_t pa;
443 struct balloon_page_entry *bpg_entry;
444 xen_pfn_t *mfn_list = sc->sc_mfn_list;
445
446 struct xen_memory_reservation reservation = {
447 .address_bits = 0,
448 .extent_order = 0,
449 .domid = DOMID_SELF
450 };
451
452 KASSERT(tpages > 0);
453 KASSERT(tpages <= BALLOON_DELTA);
454
455 memset(mfn_list, 0, BALLOON_DELTA * sizeof(*mfn_list));
456
457 /*
458 * If the list is empty, we are deflating balloon beyond empty. This
459 * is currently unsupported as this would require to dynamically add
460 * new memory pages inside uvm(9) and instruct pmap(9) on how to
461 * handle them. For now, we clip reservation up to the point we
462 * can manage them, eg. the remaining bpg entries in the SLIST.
463 * XXX find a way to hotplug memory through uvm(9)/pmap(9).
464 */
465 if (tpages > sc->balloon_num_page_entries) {
466 device_printf(sc->sc_dev,
467 "memory 'hot-plug' unsupported - clipping "
468 "reservation %zu => %zu pages.\n",
469 tpages, sc->balloon_num_page_entries);
470 tpages = sc->balloon_num_page_entries;
471 }
472
473 /* reclaim pages from balloon */
474 set_xen_guest_handle(reservation.extent_start, mfn_list);
475 reservation.nr_extents = tpages;
476
477 s = splvm();
478 ret = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
479 splx(s);
480
481 if (ret < 0) {
482 panic("%s: increase reservation failed, ret %d",
483 device_xname(sc->sc_dev), ret);
484 }
485
486 if (ret != tpages) {
487 device_printf(sc->sc_dev,
488 "increase reservation incomplete: was %zu, "
489 "returned %d\n", tpages, ret);
490 }
491
492 /* plug pages back into memory through bpge entries */
493 for (rpages = 0; rpages < ret; rpages++) {
494
495#ifdef noyet
496 if (sc->balloon_num_page_entries == 0) {
497 /*
498 * XXX This is the case where extra "hot-plug"
499 * mem w.r.t boot comes in
500 */
501 device_printf(sc->sc_dev,
502 "List empty. Cannot be collapsed further!\n");
503 break;
504 }
505#endif
506
507 bpg_entry = SLIST_FIRST(&balloon_sc->balloon_page_entries);
508 SLIST_REMOVE_HEAD(&balloon_sc->balloon_page_entries, entry);
509 balloon_sc->balloon_num_page_entries--;
510
511 /* Update P->M */
512 pa = VM_PAGE_TO_PHYS(bpg_entry->pg);
513
514 s = splvm();
515
516 xpmap_ptom_map(pa, ptoa(mfn_list[rpages]));
517 xpq_queue_machphys_update(ptoa(mfn_list[rpages]), pa);
518
519 splx(s);
520
521 pool_cache_put(sc->bpge_pool, bpg_entry);
522 }
523
524 xpq_flush_queue();
525
526#if BALLOONDEBUG
527 device_printf(sc->sc_dev, "deflate %zu => deflated by %d\n",
528 tpages, rpages);
529#endif
530 return rpages;
531}
532
533/*
534 * The balloon thread is responsible for handling inflate/deflate balloon
535 * requests for the current domain given the new "target" value.
536 */
537static void
538balloon_thread(void *cookie)
539{
540 int ret;
541 size_t current, diff, target;
542 struct balloon_xenbus_softc *sc = cookie;
543
544 for/*ever*/ (;;) {
545 current = xenmem_get_currentreservation();
546
547 /*
548 * We assume that balloon_xenbus_watcher() and
549 * sysctl(9) handlers checked the sanity of the
550 * new target value.
551 */
552 mutex_enter(&sc->balloon_mtx);
553 target = sc->balloon_target;
554 if (current != target) {
555 /*
556 * There is work to do. Inflate/deflate in
557 * increments of BALLOON_DELTA pages at maximum. The
558 * risk of integer wrapping is mitigated by
559 * BALLOON_DELTA, which is the upper bound.
560 */
561 mutex_exit(&sc->balloon_mtx);
562 diff = MIN(target - current, BALLOON_DELTA);
563 if (current < target)
564 ret = balloon_deflate(sc, diff);
565 else
566 ret = balloon_inflate(sc, diff);
567
568 if (ret != diff) {
569 /*
570 * Something went wrong during operation.
571 * Log error then feedback current value in
572 * target so that thread gets back to waiting
573 * for the next iteration
574 */
575 device_printf(sc->sc_dev,
576 "WARNING: balloon could not reach target "
577 "%zu (current %zu)\n",
578 target, current);
579 current = xenmem_get_currentreservation();
580 mutex_enter(&sc->balloon_mtx);
581 sc->balloon_target = current;
582 mutex_exit(&sc->balloon_mtx);
583 }
584 } else {
585 /* no need for change -- wait for a signal */
586 cv_wait(&sc->balloon_cv, &sc->balloon_mtx);
587 mutex_exit(&sc->balloon_mtx);
588 }
589 }
590}
591
592/*
593 * Handler called when memory/target value changes inside Xenstore.
594 * All sanity checks must also happen in this handler, as it is the common
595 * entry point where controller domain schedules balloon operations.
596 */
597static void
598balloon_xenbus_watcher(struct xenbus_watch *watch, const char **vec,
599 unsigned int len)
600{
601 size_t new_target;
602 uint64_t target_kb, target_max, target_min;
603
604 target_kb = balloon_xenbus_read_target();
605 if (target_kb == 0) {
606 /* bogus -- just return */
607 return;
608 }
609
610 mutex_enter(&balloon_sc->balloon_mtx);
611 target_min = balloon_sc->balloon_res_min;
612 mutex_exit(&balloon_sc->balloon_mtx);
613 if (target_kb < target_min) {
614 device_printf(balloon_sc->sc_dev,
615 "new target %"PRIu64" is below min %"PRIu64"\n",
616 target_kb, target_min);
617 return;
618 }
619
620 target_max = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation());
621 if (target_kb > target_max) {
622 /*
623 * Should not happen. Hypervisor should block balloon
624 * requests above mem-max.
625 */
626 device_printf(balloon_sc->sc_dev,
627 "new target %"PRIu64" is above max %"PRIu64"\n",
628 target_kb, target_max);
629 return;
630 }
631
632 new_target = BALLOON_KB_TO_PAGES(target_kb);
633
634 device_printf(balloon_sc->sc_dev,
635 "current reservation: %zu pages => target: %zu pages\n",
636 xenmem_get_currentreservation(), new_target);
637
638 /* Only update target if its value changes */
639 mutex_enter(&balloon_sc->balloon_mtx);
640 if (balloon_sc->balloon_target != new_target) {
641 balloon_sc->balloon_target = new_target;
642 cv_signal(&balloon_sc->balloon_cv);
643 }
644 mutex_exit(&balloon_sc->balloon_mtx);
645
646 return;
647}
648
649/*
650 * sysctl(9) stuff
651 */
652
653/* routine to control the minimum memory reserved for the domain */
654static int
655sysctl_kern_xen_balloon_min(SYSCTLFN_ARGS)
656{
657 struct sysctlnode node;
658 u_quad_t newval;
659 int error;
660
661 node = *rnode;
662 node.sysctl_data = &newval;
663
664 mutex_enter(&balloon_sc->balloon_mtx);
665 newval = balloon_sc->balloon_res_min;
666 mutex_exit(&balloon_sc->balloon_mtx);
667
668 error = sysctl_lookup(SYSCTLFN_CALL(&node));
669 if (error || newp == NULL)
670 return error;
671
672 /* Safeguard value: refuse to go below. */
673 if (newval < XEN_RESERVATION_MIN) {
674 device_printf(balloon_sc->sc_dev,
675 "cannot set min below minimum safe value (%d)\n",
676 XEN_RESERVATION_MIN);
677 return EPERM;
678 }
679
680 mutex_enter(&balloon_sc->balloon_mtx);
681 if (balloon_sc->balloon_res_min != newval)
682 balloon_sc->balloon_res_min = newval;
683 mutex_exit(&balloon_sc->balloon_mtx);
684
685 return 0;
686}
687
688/* Returns the maximum memory reservation of the domain */
689static int
690sysctl_kern_xen_balloon_max(SYSCTLFN_ARGS)
691{
692 struct sysctlnode node;
693 u_quad_t node_val;
694
695 node = *rnode;
696
697 node_val = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation());
698 node.sysctl_data = &node_val;
699 return sysctl_lookup(SYSCTLFN_CALL(&node));
700}
701
702/* Returns the current memory reservation of the domain */
703static int
704sysctl_kern_xen_balloon_current(SYSCTLFN_ARGS)
705{
706 struct sysctlnode node;
707 u_quad_t node_val;
708
709 node = *rnode;
710
711 node_val = BALLOON_PAGES_TO_KB(xenmem_get_currentreservation());
712 node.sysctl_data = &node_val;
713 return sysctl_lookup(SYSCTLFN_CALL(&node));
714}
715
716/*
717 * Returns the target memory reservation of the domain
718 * When reading, this sysctl will return the value of the balloon_target
719 * variable, converted into KiB
720 * When used for writing, it will update the new memory/target value
721 * in XenStore, but will not update the balloon_target variable directly.
722 * This will be done by the Xenbus watch handler, balloon_xenbus_watcher().
723 */
724static int
725sysctl_kern_xen_balloon_target(SYSCTLFN_ARGS)
726{
727 struct sysctlnode node;
728 u_quad_t newval, res_min, res_max;
729 int error;
730
731 node = *rnode;
732 node.sysctl_data = &newval;
733
734 mutex_enter(&balloon_sc->balloon_mtx);
735 newval = BALLOON_PAGES_TO_KB(balloon_sc->balloon_target);
736 res_min = balloon_sc->balloon_res_min;
737 mutex_exit(&balloon_sc->balloon_mtx);
738
739 error = sysctl_lookup(SYSCTLFN_CALL(&node));
740 if (newp == NULL || error != 0) {
741 return error;
742 }
743
744 /*
745 * Sanity check new size
746 * We should not balloon below the minimum reservation
747 * set by the domain, nor above the maximum reservation set
748 * by domain controller.
749 * Note: domain is not supposed to receive balloon requests when
750 * they are above maximum reservation, but better be safe than
751 * sorry.
752 */
753 res_max = BALLOON_PAGES_TO_KB(xenmem_get_maxreservation());
754 if (newval < res_min || newval > res_max) {
755#if BALLOONDEBUG
756 device_printf(balloon_sc->sc_dev,
757 "new value out of bounds: %"PRIu64"\n", newval);
758 device_printf(balloon_sc->sc_dev,
759 "min %"PRIu64", max %"PRIu64"\n", res_min, res_max);
760#endif
761 return EPERM;
762 }
763
764 /*
765 * Write new value inside Xenstore. This will fire the memory/target
766 * watch handler, balloon_xenbus_watcher().
767 */
768 balloon_xenbus_write_target(newval);
769
770 return 0;
771}
772
773/* sysctl(9) nodes creation */
774static void
775sysctl_kern_xen_balloon_setup(struct balloon_xenbus_softc *sc)
776{
777 const struct sysctlnode *node = NULL;
778 struct sysctllog **clog = &sc->sc_log;
779
780 sysctl_createv(clog, 0, NULL, &node,
781 CTLFLAG_PERMANENT,
782 CTLTYPE_NODE, "machdep", NULL,
783 NULL, 0, NULL, 0,
784 CTL_MACHDEP, CTL_EOL);
785
786 sysctl_createv(clog, 0, &node, &node,
787 CTLFLAG_PERMANENT,
788 CTLTYPE_NODE, "xen",
789 SYSCTL_DESCR("Xen top level node"),
790 NULL, 0, NULL, 0,
791 CTL_CREATE, CTL_EOL);
792
793 sysctl_createv(clog, 0, &node, &node,
794 CTLFLAG_PERMANENT,
795 CTLTYPE_NODE, "balloon",
796 SYSCTL_DESCR("Balloon details"),
797 NULL, 0, NULL, 0,
798 CTL_CREATE, CTL_EOL);
799
800 sysctl_createv(clog, 0, &node, NULL,
801 CTLFLAG_PERMANENT | CTLFLAG_READONLY,
802 CTLTYPE_QUAD, "current",
803 SYSCTL_DESCR("Domain's current memory reservation from "
804 "hypervisor, in KiB."),
805 sysctl_kern_xen_balloon_current, 0, NULL, 0,
806 CTL_CREATE, CTL_EOL);
807
808 sysctl_createv(clog, 0, &node, NULL,
809 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
810 CTLTYPE_QUAD, "target",
811 SYSCTL_DESCR("Target memory reservation for domain, in KiB."),
812 sysctl_kern_xen_balloon_target, 0, NULL, 0,
813 CTL_CREATE, CTL_EOL);
814
815 sysctl_createv(clog, 0, &node, NULL,
816 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
817 CTLTYPE_QUAD, "min",
818 SYSCTL_DESCR("Minimum amount of memory the domain "
819 "reserves, in KiB."),
820 sysctl_kern_xen_balloon_min, 0, NULL, 0,
821 CTL_CREATE, CTL_EOL);
822
823 sysctl_createv(clog, 0, &node, NULL,
824 CTLFLAG_PERMANENT | CTLFLAG_READONLY,
825 CTLTYPE_QUAD, "max",
826 SYSCTL_DESCR("Maximum amount of memory the domain "
827 "can use, in KiB."),
828 sysctl_kern_xen_balloon_max, 0, NULL, 0,
829 CTL_CREATE, CTL_EOL);
830}
831