1 | /* $NetBSD: uvm_km.c,v 1.141 2016/07/27 16:45:00 maxv Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1997 Charles D. Cranor and Washington University. |
5 | * Copyright (c) 1991, 1993, The Regents of the University of California. |
6 | * |
7 | * All rights reserved. |
8 | * |
9 | * This code is derived from software contributed to Berkeley by |
10 | * The Mach Operating System project at Carnegie-Mellon University. |
11 | * |
12 | * Redistribution and use in source and binary forms, with or without |
13 | * modification, are permitted provided that the following conditions |
14 | * are met: |
15 | * 1. Redistributions of source code must retain the above copyright |
16 | * notice, this list of conditions and the following disclaimer. |
17 | * 2. Redistributions in binary form must reproduce the above copyright |
18 | * notice, this list of conditions and the following disclaimer in the |
19 | * documentation and/or other materials provided with the distribution. |
20 | * 3. Neither the name of the University nor the names of its contributors |
21 | * may be used to endorse or promote products derived from this software |
22 | * without specific prior written permission. |
23 | * |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
34 | * SUCH DAMAGE. |
35 | * |
36 | * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 |
37 | * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp |
38 | * |
39 | * |
40 | * Copyright (c) 1987, 1990 Carnegie-Mellon University. |
41 | * All rights reserved. |
42 | * |
43 | * Permission to use, copy, modify and distribute this software and |
44 | * its documentation is hereby granted, provided that both the copyright |
45 | * notice and this permission notice appear in all copies of the |
46 | * software, derivative works or modified versions, and any portions |
47 | * thereof, and that both notices appear in supporting documentation. |
48 | * |
49 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
50 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
51 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
52 | * |
53 | * Carnegie Mellon requests users of this software to return to |
54 | * |
55 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
56 | * School of Computer Science |
57 | * Carnegie Mellon University |
58 | * Pittsburgh PA 15213-3890 |
59 | * |
60 | * any improvements or extensions that they make and grant Carnegie the |
61 | * rights to redistribute these changes. |
62 | */ |
63 | |
64 | /* |
65 | * uvm_km.c: handle kernel memory allocation and management |
66 | */ |
67 | |
68 | /* |
69 | * overview of kernel memory management: |
70 | * |
71 | * the kernel virtual address space is mapped by "kernel_map." kernel_map |
72 | * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS. |
73 | * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map). |
74 | * |
75 | * the kernel_map has several "submaps." submaps can only appear in |
76 | * the kernel_map (user processes can't use them). submaps "take over" |
77 | * the management of a sub-range of the kernel's address space. submaps |
78 | * are typically allocated at boot time and are never released. kernel |
79 | * virtual address space that is mapped by a submap is locked by the |
80 | * submap's lock -- not the kernel_map's lock. |
81 | * |
82 | * thus, the useful feature of submaps is that they allow us to break |
83 | * up the locking and protection of the kernel address space into smaller |
84 | * chunks. |
85 | * |
86 | * the vm system has several standard kernel submaps/arenas, including: |
87 | * kmem_arena => used for kmem/pool (memoryallocators(9)) |
88 | * pager_map => used to map "buf" structures into kernel space |
89 | * exec_map => used during exec to handle exec args |
90 | * etc... |
91 | * |
92 | * The kmem_arena is a "special submap", as it lives in a fixed map entry |
93 | * within the kernel_map and is controlled by vmem(9). |
94 | * |
95 | * the kernel allocates its private memory out of special uvm_objects whose |
96 | * reference count is set to UVM_OBJ_KERN (thus indicating that the objects |
97 | * are "special" and never die). all kernel objects should be thought of |
98 | * as large, fixed-sized, sparsely populated uvm_objects. each kernel |
99 | * object is equal to the size of kernel virtual address space (i.e. the |
100 | * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS"). |
101 | * |
102 | * note that just because a kernel object spans the entire kernel virtual |
103 | * address space doesn't mean that it has to be mapped into the entire space. |
104 | * large chunks of a kernel object's space go unused either because |
105 | * that area of kernel VM is unmapped, or there is some other type of |
106 | * object mapped into that range (e.g. a vnode). for submap's kernel |
107 | * objects, the only part of the object that can ever be populated is the |
108 | * offsets that are managed by the submap. |
109 | * |
110 | * note that the "offset" in a kernel object is always the kernel virtual |
111 | * address minus the VM_MIN_KERNEL_ADDRESS (aka vm_map_min(kernel_map)). |
112 | * example: |
113 | * suppose VM_MIN_KERNEL_ADDRESS is 0xf8000000 and the kernel does a |
114 | * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the |
115 | * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000, |
116 | * then that means that the page at offset 0x235000 in kernel_object is |
117 | * mapped at 0xf8235000. |
118 | * |
119 | * kernel object have one other special property: when the kernel virtual |
120 | * memory mapping them is unmapped, the backing memory in the object is |
121 | * freed right away. this is done with the uvm_km_pgremove() function. |
122 | * this has to be done because there is no backing store for kernel pages |
123 | * and no need to save them after they are no longer referenced. |
124 | * |
125 | * Generic arenas: |
126 | * |
127 | * kmem_arena: |
128 | * Main arena controlling the kernel KVA used by other arenas. |
129 | * |
130 | * kmem_va_arena: |
131 | * Implements quantum caching in order to speedup allocations and |
132 | * reduce fragmentation. The pool(9), unless created with a custom |
133 | * meta-data allocator, and kmem(9) subsystems use this arena. |
134 | * |
135 | * Arenas for meta-data allocations are used by vmem(9) and pool(9). |
136 | * These arenas cannot use quantum cache. However, kmem_va_meta_arena |
137 | * compensates this by importing larger chunks from kmem_arena. |
138 | * |
139 | * kmem_va_meta_arena: |
140 | * Space for meta-data. |
141 | * |
142 | * kmem_meta_arena: |
143 | * Imports from kmem_va_meta_arena. Allocations from this arena are |
144 | * backed with the pages. |
145 | * |
146 | * Arena stacking: |
147 | * |
148 | * kmem_arena |
149 | * kmem_va_arena |
150 | * kmem_va_meta_arena |
151 | * kmem_meta_arena |
152 | */ |
153 | |
154 | #include <sys/cdefs.h> |
155 | __KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.141 2016/07/27 16:45:00 maxv Exp $" ); |
156 | |
157 | #include "opt_uvmhist.h" |
158 | |
159 | #include "opt_kmempages.h" |
160 | |
161 | #ifndef NKMEMPAGES |
162 | #define NKMEMPAGES 0 |
163 | #endif |
164 | |
165 | /* |
166 | * Defaults for lower and upper-bounds for the kmem_arena page count. |
167 | * Can be overridden by kernel config options. |
168 | */ |
169 | #ifndef NKMEMPAGES_MIN |
170 | #define NKMEMPAGES_MIN NKMEMPAGES_MIN_DEFAULT |
171 | #endif |
172 | |
173 | #ifndef NKMEMPAGES_MAX |
174 | #define NKMEMPAGES_MAX NKMEMPAGES_MAX_DEFAULT |
175 | #endif |
176 | |
177 | |
178 | #include <sys/param.h> |
179 | #include <sys/systm.h> |
180 | #include <sys/proc.h> |
181 | #include <sys/pool.h> |
182 | #include <sys/vmem.h> |
183 | #include <sys/vmem_impl.h> |
184 | #include <sys/kmem.h> |
185 | |
186 | #include <uvm/uvm.h> |
187 | |
188 | /* |
189 | * global data structures |
190 | */ |
191 | |
192 | struct vm_map *kernel_map = NULL; |
193 | |
194 | /* |
195 | * local data structues |
196 | */ |
197 | |
198 | static struct vm_map kernel_map_store; |
199 | static struct vm_map_entry kernel_image_mapent_store; |
200 | static struct vm_map_entry kernel_kmem_mapent_store; |
201 | |
202 | int nkmempages = 0; |
203 | vaddr_t kmembase; |
204 | vsize_t kmemsize; |
205 | |
206 | static struct vmem kmem_arena_store; |
207 | vmem_t *kmem_arena = NULL; |
208 | static struct vmem kmem_va_arena_store; |
209 | vmem_t *kmem_va_arena; |
210 | |
211 | /* |
212 | * kmeminit_nkmempages: calculate the size of kmem_arena. |
213 | */ |
214 | void |
215 | kmeminit_nkmempages(void) |
216 | { |
217 | int npages; |
218 | |
219 | if (nkmempages != 0) { |
220 | /* |
221 | * It's already been set (by us being here before) |
222 | * bail out now; |
223 | */ |
224 | return; |
225 | } |
226 | |
227 | #if defined(PMAP_MAP_POOLPAGE) |
228 | npages = (physmem / 4); |
229 | #else |
230 | npages = (physmem / 3) * 2; |
231 | #endif /* defined(PMAP_MAP_POOLPAGE) */ |
232 | |
233 | #ifndef NKMEMPAGES_MAX_UNLIMITED |
234 | if (npages > NKMEMPAGES_MAX) |
235 | npages = NKMEMPAGES_MAX; |
236 | #endif |
237 | |
238 | if (npages < NKMEMPAGES_MIN) |
239 | npages = NKMEMPAGES_MIN; |
240 | |
241 | nkmempages = npages; |
242 | } |
243 | |
244 | /* |
245 | * uvm_km_bootstrap: init kernel maps and objects to reflect reality (i.e. |
246 | * KVM already allocated for text, data, bss, and static data structures). |
247 | * |
248 | * => KVM is defined by VM_MIN_KERNEL_ADDRESS/VM_MAX_KERNEL_ADDRESS. |
249 | * we assume that [vmin -> start] has already been allocated and that |
250 | * "end" is the end. |
251 | */ |
252 | |
253 | void |
254 | uvm_km_bootstrap(vaddr_t start, vaddr_t end) |
255 | { |
256 | bool kmem_arena_small; |
257 | vaddr_t base = VM_MIN_KERNEL_ADDRESS; |
258 | struct uvm_map_args args; |
259 | int error; |
260 | |
261 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); |
262 | UVMHIST_LOG(maphist, "start=%" PRIxVADDR" end=%#" PRIxVADDR, |
263 | start, end, 0,0); |
264 | |
265 | kmeminit_nkmempages(); |
266 | kmemsize = (vsize_t)nkmempages * PAGE_SIZE; |
267 | kmem_arena_small = kmemsize < 64 * 1024 * 1024; |
268 | |
269 | UVMHIST_LOG(maphist, "kmemsize=%#" PRIxVSIZE, kmemsize, 0,0,0); |
270 | |
271 | /* |
272 | * next, init kernel memory objects. |
273 | */ |
274 | |
275 | /* kernel_object: for pageable anonymous kernel memory */ |
276 | uvm_kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS - |
277 | VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ); |
278 | |
279 | /* |
280 | * init the map and reserve any space that might already |
281 | * have been allocated kernel space before installing. |
282 | */ |
283 | |
284 | uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE); |
285 | kernel_map_store.pmap = pmap_kernel(); |
286 | if (start != base) { |
287 | error = uvm_map_prepare(&kernel_map_store, |
288 | base, start - base, |
289 | NULL, UVM_UNKNOWN_OFFSET, 0, |
290 | UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, |
291 | UVM_ADV_RANDOM, UVM_FLAG_FIXED), &args); |
292 | if (!error) { |
293 | kernel_image_mapent_store.flags = |
294 | UVM_MAP_KERNEL | UVM_MAP_STATIC | UVM_MAP_NOMERGE; |
295 | error = uvm_map_enter(&kernel_map_store, &args, |
296 | &kernel_image_mapent_store); |
297 | } |
298 | |
299 | if (error) |
300 | panic( |
301 | "uvm_km_bootstrap: could not reserve space for kernel" ); |
302 | |
303 | kmembase = args.uma_start + args.uma_size; |
304 | } else { |
305 | kmembase = base; |
306 | } |
307 | |
308 | error = uvm_map_prepare(&kernel_map_store, |
309 | kmembase, kmemsize, |
310 | NULL, UVM_UNKNOWN_OFFSET, 0, |
311 | UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, |
312 | UVM_ADV_RANDOM, UVM_FLAG_FIXED), &args); |
313 | if (!error) { |
314 | kernel_kmem_mapent_store.flags = |
315 | UVM_MAP_KERNEL | UVM_MAP_STATIC | UVM_MAP_NOMERGE; |
316 | error = uvm_map_enter(&kernel_map_store, &args, |
317 | &kernel_kmem_mapent_store); |
318 | } |
319 | |
320 | if (error) |
321 | panic("uvm_km_bootstrap: could not reserve kernel kmem" ); |
322 | |
323 | /* |
324 | * install! |
325 | */ |
326 | |
327 | kernel_map = &kernel_map_store; |
328 | |
329 | pool_subsystem_init(); |
330 | |
331 | kmem_arena = vmem_init(&kmem_arena_store, "kmem" , |
332 | kmembase, kmemsize, PAGE_SIZE, NULL, NULL, NULL, |
333 | 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); |
334 | #ifdef PMAP_GROWKERNEL |
335 | /* |
336 | * kmem_arena VA allocations happen independently of uvm_map. |
337 | * grow kernel to accommodate the kmem_arena. |
338 | */ |
339 | if (uvm_maxkaddr < kmembase + kmemsize) { |
340 | uvm_maxkaddr = pmap_growkernel(kmembase + kmemsize); |
341 | KASSERTMSG(uvm_maxkaddr >= kmembase + kmemsize, |
342 | "%#" PRIxVADDR" %#" PRIxVADDR" %#" PRIxVSIZE, |
343 | uvm_maxkaddr, kmembase, kmemsize); |
344 | } |
345 | #endif |
346 | |
347 | vmem_subsystem_init(kmem_arena); |
348 | |
349 | UVMHIST_LOG(maphist, "kmem vmem created (base=%#" PRIxVADDR |
350 | ", size=%#" PRIxVSIZE, kmembase, kmemsize, 0,0); |
351 | |
352 | kmem_va_arena = vmem_init(&kmem_va_arena_store, "kva" , |
353 | 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, kmem_arena, |
354 | (kmem_arena_small ? 4 : VMEM_QCACHE_IDX_MAX) * PAGE_SIZE, |
355 | VM_NOSLEEP, IPL_VM); |
356 | |
357 | UVMHIST_LOG(maphist, "<- done" , 0,0,0,0); |
358 | } |
359 | |
360 | /* |
361 | * uvm_km_init: init the kernel maps virtual memory caches |
362 | * and start the pool/kmem allocator. |
363 | */ |
364 | void |
365 | uvm_km_init(void) |
366 | { |
367 | kmem_init(); |
368 | } |
369 | |
370 | /* |
371 | * uvm_km_suballoc: allocate a submap in the kernel map. once a submap |
372 | * is allocated all references to that area of VM must go through it. this |
373 | * allows the locking of VAs in kernel_map to be broken up into regions. |
374 | * |
375 | * => if `fixed' is true, *vmin specifies where the region described |
376 | * pager_map => used to map "buf" structures into kernel space |
377 | * by the submap must start |
378 | * => if submap is non NULL we use that as the submap, otherwise we |
379 | * alloc a new map |
380 | */ |
381 | |
382 | struct vm_map * |
383 | uvm_km_suballoc(struct vm_map *map, vaddr_t *vmin /* IN/OUT */, |
384 | vaddr_t *vmax /* OUT */, vsize_t size, int flags, bool fixed, |
385 | struct vm_map *submap) |
386 | { |
387 | int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0); |
388 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); |
389 | |
390 | KASSERT(vm_map_pmap(map) == pmap_kernel()); |
391 | |
392 | size = round_page(size); /* round up to pagesize */ |
393 | |
394 | /* |
395 | * first allocate a blank spot in the parent map |
396 | */ |
397 | |
398 | if (uvm_map(map, vmin, size, NULL, UVM_UNKNOWN_OFFSET, 0, |
399 | UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, |
400 | UVM_ADV_RANDOM, mapflags)) != 0) { |
401 | panic("%s: unable to allocate space in parent map" , __func__); |
402 | } |
403 | |
404 | /* |
405 | * set VM bounds (vmin is filled in by uvm_map) |
406 | */ |
407 | |
408 | *vmax = *vmin + size; |
409 | |
410 | /* |
411 | * add references to pmap and create or init the submap |
412 | */ |
413 | |
414 | pmap_reference(vm_map_pmap(map)); |
415 | if (submap == NULL) { |
416 | submap = kmem_alloc(sizeof(*submap), KM_SLEEP); |
417 | if (submap == NULL) |
418 | panic("uvm_km_suballoc: unable to create submap" ); |
419 | } |
420 | uvm_map_setup(submap, *vmin, *vmax, flags); |
421 | submap->pmap = vm_map_pmap(map); |
422 | |
423 | /* |
424 | * now let uvm_map_submap plug in it... |
425 | */ |
426 | |
427 | if (uvm_map_submap(map, *vmin, *vmax, submap) != 0) |
428 | panic("uvm_km_suballoc: submap allocation failed" ); |
429 | |
430 | return(submap); |
431 | } |
432 | |
433 | /* |
434 | * uvm_km_pgremove: remove pages from a kernel uvm_object and KVA. |
435 | */ |
436 | |
437 | void |
438 | uvm_km_pgremove(vaddr_t startva, vaddr_t endva) |
439 | { |
440 | struct uvm_object * const uobj = uvm_kernel_object; |
441 | const voff_t start = startva - vm_map_min(kernel_map); |
442 | const voff_t end = endva - vm_map_min(kernel_map); |
443 | struct vm_page *pg; |
444 | voff_t curoff, nextoff; |
445 | int swpgonlydelta = 0; |
446 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); |
447 | |
448 | KASSERT(VM_MIN_KERNEL_ADDRESS <= startva); |
449 | KASSERT(startva < endva); |
450 | KASSERT(endva <= VM_MAX_KERNEL_ADDRESS); |
451 | |
452 | mutex_enter(uobj->vmobjlock); |
453 | pmap_remove(pmap_kernel(), startva, endva); |
454 | for (curoff = start; curoff < end; curoff = nextoff) { |
455 | nextoff = curoff + PAGE_SIZE; |
456 | pg = uvm_pagelookup(uobj, curoff); |
457 | if (pg != NULL && pg->flags & PG_BUSY) { |
458 | pg->flags |= PG_WANTED; |
459 | UVM_UNLOCK_AND_WAIT(pg, uobj->vmobjlock, 0, |
460 | "km_pgrm" , 0); |
461 | mutex_enter(uobj->vmobjlock); |
462 | nextoff = curoff; |
463 | continue; |
464 | } |
465 | |
466 | /* |
467 | * free the swap slot, then the page. |
468 | */ |
469 | |
470 | if (pg == NULL && |
471 | uao_find_swslot(uobj, curoff >> PAGE_SHIFT) > 0) { |
472 | swpgonlydelta++; |
473 | } |
474 | uao_dropswap(uobj, curoff >> PAGE_SHIFT); |
475 | if (pg != NULL) { |
476 | mutex_enter(&uvm_pageqlock); |
477 | uvm_pagefree(pg); |
478 | mutex_exit(&uvm_pageqlock); |
479 | } |
480 | } |
481 | mutex_exit(uobj->vmobjlock); |
482 | |
483 | if (swpgonlydelta > 0) { |
484 | mutex_enter(&uvm_swap_data_lock); |
485 | KASSERT(uvmexp.swpgonly >= swpgonlydelta); |
486 | uvmexp.swpgonly -= swpgonlydelta; |
487 | mutex_exit(&uvm_swap_data_lock); |
488 | } |
489 | } |
490 | |
491 | |
492 | /* |
493 | * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for non object backed |
494 | * regions. |
495 | * |
496 | * => when you unmap a part of anonymous kernel memory you want to toss |
497 | * the pages right away. (this is called from uvm_unmap_...). |
498 | * => none of the pages will ever be busy, and none of them will ever |
499 | * be on the active or inactive queues (because they have no object). |
500 | */ |
501 | |
502 | void |
503 | uvm_km_pgremove_intrsafe(struct vm_map *map, vaddr_t start, vaddr_t end) |
504 | { |
505 | #define __PGRM_BATCH 16 |
506 | struct vm_page *pg; |
507 | paddr_t pa[__PGRM_BATCH]; |
508 | int npgrm, i; |
509 | vaddr_t va, batch_vastart; |
510 | |
511 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); |
512 | |
513 | KASSERT(VM_MAP_IS_KERNEL(map)); |
514 | KASSERTMSG(vm_map_min(map) <= start, |
515 | "vm_map_min(map) [%#" PRIxVADDR"] <= start [%#" PRIxVADDR"]" |
516 | " (size=%#" PRIxVSIZE")" , |
517 | vm_map_min(map), start, end - start); |
518 | KASSERT(start < end); |
519 | KASSERT(end <= vm_map_max(map)); |
520 | |
521 | for (va = start; va < end;) { |
522 | batch_vastart = va; |
523 | /* create a batch of at most __PGRM_BATCH pages to free */ |
524 | for (i = 0; |
525 | i < __PGRM_BATCH && va < end; |
526 | va += PAGE_SIZE) { |
527 | if (!pmap_extract(pmap_kernel(), va, &pa[i])) { |
528 | continue; |
529 | } |
530 | i++; |
531 | } |
532 | npgrm = i; |
533 | /* now remove the mappings */ |
534 | pmap_kremove(batch_vastart, va - batch_vastart); |
535 | /* and free the pages */ |
536 | for (i = 0; i < npgrm; i++) { |
537 | pg = PHYS_TO_VM_PAGE(pa[i]); |
538 | KASSERT(pg); |
539 | KASSERT(pg->uobject == NULL && pg->uanon == NULL); |
540 | KASSERT((pg->flags & PG_BUSY) == 0); |
541 | uvm_pagefree(pg); |
542 | } |
543 | } |
544 | #undef __PGRM_BATCH |
545 | } |
546 | |
547 | #if defined(DEBUG) |
548 | void |
549 | uvm_km_check_empty(struct vm_map *map, vaddr_t start, vaddr_t end) |
550 | { |
551 | struct vm_page *pg; |
552 | vaddr_t va; |
553 | paddr_t pa; |
554 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); |
555 | |
556 | KDASSERT(VM_MAP_IS_KERNEL(map)); |
557 | KDASSERT(vm_map_min(map) <= start); |
558 | KDASSERT(start < end); |
559 | KDASSERT(end <= vm_map_max(map)); |
560 | |
561 | for (va = start; va < end; va += PAGE_SIZE) { |
562 | if (pmap_extract(pmap_kernel(), va, &pa)) { |
563 | panic("uvm_km_check_empty: va %p has pa 0x%llx" , |
564 | (void *)va, (long long)pa); |
565 | } |
566 | mutex_enter(uvm_kernel_object->vmobjlock); |
567 | pg = uvm_pagelookup(uvm_kernel_object, |
568 | va - vm_map_min(kernel_map)); |
569 | mutex_exit(uvm_kernel_object->vmobjlock); |
570 | if (pg) { |
571 | panic("uvm_km_check_empty: " |
572 | "has page hashed at %p" , (const void *)va); |
573 | } |
574 | } |
575 | } |
576 | #endif /* defined(DEBUG) */ |
577 | |
578 | /* |
579 | * uvm_km_alloc: allocate an area of kernel memory. |
580 | * |
581 | * => NOTE: we can return 0 even if we can wait if there is not enough |
582 | * free VM space in the map... caller should be prepared to handle |
583 | * this case. |
584 | * => we return KVA of memory allocated |
585 | */ |
586 | |
587 | vaddr_t |
588 | uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) |
589 | { |
590 | vaddr_t kva, loopva; |
591 | vaddr_t offset; |
592 | vsize_t loopsize; |
593 | struct vm_page *pg; |
594 | struct uvm_object *obj; |
595 | int pgaflags; |
596 | vm_prot_t prot, vaprot; |
597 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); |
598 | |
599 | KASSERT(vm_map_pmap(map) == pmap_kernel()); |
600 | KASSERT((flags & UVM_KMF_TYPEMASK) == UVM_KMF_WIRED || |
601 | (flags & UVM_KMF_TYPEMASK) == UVM_KMF_PAGEABLE || |
602 | (flags & UVM_KMF_TYPEMASK) == UVM_KMF_VAONLY); |
603 | KASSERT((flags & UVM_KMF_VAONLY) != 0 || (flags & UVM_KMF_COLORMATCH) == 0); |
604 | KASSERT((flags & UVM_KMF_COLORMATCH) == 0 || (flags & UVM_KMF_VAONLY) != 0); |
605 | |
606 | /* |
607 | * setup for call |
608 | */ |
609 | |
610 | kva = vm_map_min(map); /* hint */ |
611 | size = round_page(size); |
612 | obj = (flags & UVM_KMF_PAGEABLE) ? uvm_kernel_object : NULL; |
613 | UVMHIST_LOG(maphist," (map=0x%x, obj=0x%x, size=0x%x, flags=%d)" , |
614 | map, obj, size, flags); |
615 | |
616 | /* |
617 | * allocate some virtual space |
618 | */ |
619 | |
620 | vaprot = (flags & UVM_KMF_EXEC) ? UVM_PROT_ALL : UVM_PROT_RW; |
621 | if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET, |
622 | align, UVM_MAPFLAG(vaprot, UVM_PROT_ALL, UVM_INH_NONE, |
623 | UVM_ADV_RANDOM, |
624 | (flags & (UVM_KMF_TRYLOCK | UVM_KMF_NOWAIT | UVM_KMF_WAITVA |
625 | | UVM_KMF_COLORMATCH)))) != 0)) { |
626 | UVMHIST_LOG(maphist, "<- done (no VM)" ,0,0,0,0); |
627 | return(0); |
628 | } |
629 | |
630 | /* |
631 | * if all we wanted was VA, return now |
632 | */ |
633 | |
634 | if (flags & (UVM_KMF_VAONLY | UVM_KMF_PAGEABLE)) { |
635 | UVMHIST_LOG(maphist,"<- done valloc (kva=0x%x)" , kva,0,0,0); |
636 | return(kva); |
637 | } |
638 | |
639 | /* |
640 | * recover object offset from virtual address |
641 | */ |
642 | |
643 | offset = kva - vm_map_min(kernel_map); |
644 | UVMHIST_LOG(maphist, " kva=0x%x, offset=0x%x" , kva, offset,0,0); |
645 | |
646 | /* |
647 | * now allocate and map in the memory... note that we are the only ones |
648 | * whom should ever get a handle on this area of VM. |
649 | */ |
650 | |
651 | loopva = kva; |
652 | loopsize = size; |
653 | |
654 | pgaflags = UVM_FLAG_COLORMATCH; |
655 | if (flags & UVM_KMF_NOWAIT) |
656 | pgaflags |= UVM_PGA_USERESERVE; |
657 | if (flags & UVM_KMF_ZERO) |
658 | pgaflags |= UVM_PGA_ZERO; |
659 | prot = VM_PROT_READ | VM_PROT_WRITE; |
660 | if (flags & UVM_KMF_EXEC) |
661 | prot |= VM_PROT_EXECUTE; |
662 | while (loopsize) { |
663 | KASSERTMSG(!pmap_extract(pmap_kernel(), loopva, NULL), |
664 | "loopva=%#" PRIxVADDR, loopva); |
665 | |
666 | pg = uvm_pagealloc_strat(NULL, offset, NULL, pgaflags, |
667 | #ifdef UVM_KM_VMFREELIST |
668 | UVM_PGA_STRAT_ONLY, UVM_KM_VMFREELIST |
669 | #else |
670 | UVM_PGA_STRAT_NORMAL, 0 |
671 | #endif |
672 | ); |
673 | |
674 | /* |
675 | * out of memory? |
676 | */ |
677 | |
678 | if (__predict_false(pg == NULL)) { |
679 | if ((flags & UVM_KMF_NOWAIT) || |
680 | ((flags & UVM_KMF_CANFAIL) && !uvm_reclaimable())) { |
681 | /* free everything! */ |
682 | uvm_km_free(map, kva, size, |
683 | flags & UVM_KMF_TYPEMASK); |
684 | return (0); |
685 | } else { |
686 | uvm_wait("km_getwait2" ); /* sleep here */ |
687 | continue; |
688 | } |
689 | } |
690 | |
691 | pg->flags &= ~PG_BUSY; /* new page */ |
692 | UVM_PAGE_OWN(pg, NULL); |
693 | |
694 | /* |
695 | * map it in |
696 | */ |
697 | |
698 | pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), |
699 | prot, PMAP_KMPAGE); |
700 | loopva += PAGE_SIZE; |
701 | offset += PAGE_SIZE; |
702 | loopsize -= PAGE_SIZE; |
703 | } |
704 | |
705 | pmap_update(pmap_kernel()); |
706 | |
707 | UVMHIST_LOG(maphist,"<- done (kva=0x%x)" , kva,0,0,0); |
708 | return(kva); |
709 | } |
710 | |
711 | /* |
712 | * uvm_km_protect: change the protection of an allocated area |
713 | */ |
714 | |
715 | int |
716 | uvm_km_protect(struct vm_map *map, vaddr_t addr, vsize_t size, vm_prot_t prot) |
717 | { |
718 | return uvm_map_protect(map, addr, addr + round_page(size), prot, false); |
719 | } |
720 | |
721 | /* |
722 | * uvm_km_free: free an area of kernel memory |
723 | */ |
724 | |
725 | void |
726 | uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size, uvm_flag_t flags) |
727 | { |
728 | UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); |
729 | |
730 | KASSERT((flags & UVM_KMF_TYPEMASK) == UVM_KMF_WIRED || |
731 | (flags & UVM_KMF_TYPEMASK) == UVM_KMF_PAGEABLE || |
732 | (flags & UVM_KMF_TYPEMASK) == UVM_KMF_VAONLY); |
733 | KASSERT((addr & PAGE_MASK) == 0); |
734 | KASSERT(vm_map_pmap(map) == pmap_kernel()); |
735 | |
736 | size = round_page(size); |
737 | |
738 | if (flags & UVM_KMF_PAGEABLE) { |
739 | uvm_km_pgremove(addr, addr + size); |
740 | } else if (flags & UVM_KMF_WIRED) { |
741 | /* |
742 | * Note: uvm_km_pgremove_intrsafe() extracts mapping, thus |
743 | * remove it after. See comment below about KVA visibility. |
744 | */ |
745 | uvm_km_pgremove_intrsafe(map, addr, addr + size); |
746 | } |
747 | |
748 | /* |
749 | * Note: uvm_unmap_remove() calls pmap_update() for us, before |
750 | * KVA becomes globally available. |
751 | */ |
752 | |
753 | uvm_unmap1(map, addr, addr + size, UVM_FLAG_VAONLY); |
754 | } |
755 | |
756 | /* Sanity; must specify both or none. */ |
757 | #if (defined(PMAP_MAP_POOLPAGE) || defined(PMAP_UNMAP_POOLPAGE)) && \ |
758 | (!defined(PMAP_MAP_POOLPAGE) || !defined(PMAP_UNMAP_POOLPAGE)) |
759 | #error Must specify MAP and UNMAP together. |
760 | #endif |
761 | |
762 | int |
763 | uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, |
764 | vmem_addr_t *addr) |
765 | { |
766 | struct vm_page *pg; |
767 | vmem_addr_t va; |
768 | int rc; |
769 | vaddr_t loopva; |
770 | vsize_t loopsize; |
771 | |
772 | size = round_page(size); |
773 | |
774 | #if defined(PMAP_MAP_POOLPAGE) |
775 | if (size == PAGE_SIZE) { |
776 | again: |
777 | #ifdef PMAP_ALLOC_POOLPAGE |
778 | pg = PMAP_ALLOC_POOLPAGE((flags & VM_SLEEP) ? |
779 | 0 : UVM_PGA_USERESERVE); |
780 | #else |
781 | pg = uvm_pagealloc(NULL, 0, NULL, |
782 | (flags & VM_SLEEP) ? 0 : UVM_PGA_USERESERVE); |
783 | #endif /* PMAP_ALLOC_POOLPAGE */ |
784 | if (__predict_false(pg == NULL)) { |
785 | if (flags & VM_SLEEP) { |
786 | uvm_wait("plpg" ); |
787 | goto again; |
788 | } |
789 | return ENOMEM; |
790 | } |
791 | va = PMAP_MAP_POOLPAGE(VM_PAGE_TO_PHYS(pg)); |
792 | if (__predict_false(va == 0)) { |
793 | uvm_pagefree(pg); |
794 | return ENOMEM; |
795 | } |
796 | *addr = va; |
797 | return 0; |
798 | } |
799 | #endif /* PMAP_MAP_POOLPAGE */ |
800 | |
801 | rc = vmem_alloc(vm, size, flags, &va); |
802 | if (rc != 0) |
803 | return rc; |
804 | |
805 | #ifdef PMAP_GROWKERNEL |
806 | /* |
807 | * These VA allocations happen independently of uvm_map |
808 | * so this allocation must not extend beyond the current limit. |
809 | */ |
810 | KASSERTMSG(uvm_maxkaddr >= va + size, |
811 | "%#" PRIxVADDR" %#" PRIxPTR" %#zx" , |
812 | uvm_maxkaddr, va, size); |
813 | #endif |
814 | |
815 | loopva = va; |
816 | loopsize = size; |
817 | |
818 | while (loopsize) { |
819 | #ifdef DIAGNOSTIC |
820 | paddr_t pa; |
821 | #endif |
822 | KASSERTMSG(!pmap_extract(pmap_kernel(), loopva, &pa), |
823 | "loopva=%#" PRIxVADDR" loopsize=%#" PRIxVSIZE |
824 | " pa=%#" PRIxPADDR" vmem=%p" , |
825 | loopva, loopsize, pa, vm); |
826 | |
827 | pg = uvm_pagealloc(NULL, loopva, NULL, |
828 | UVM_FLAG_COLORMATCH |
829 | | ((flags & VM_SLEEP) ? 0 : UVM_PGA_USERESERVE)); |
830 | if (__predict_false(pg == NULL)) { |
831 | if (flags & VM_SLEEP) { |
832 | uvm_wait("plpg" ); |
833 | continue; |
834 | } else { |
835 | uvm_km_pgremove_intrsafe(kernel_map, va, |
836 | va + size); |
837 | vmem_free(vm, va, size); |
838 | return ENOMEM; |
839 | } |
840 | } |
841 | |
842 | pg->flags &= ~PG_BUSY; /* new page */ |
843 | UVM_PAGE_OWN(pg, NULL); |
844 | pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), |
845 | VM_PROT_READ|VM_PROT_WRITE, PMAP_KMPAGE); |
846 | |
847 | loopva += PAGE_SIZE; |
848 | loopsize -= PAGE_SIZE; |
849 | } |
850 | pmap_update(pmap_kernel()); |
851 | |
852 | *addr = va; |
853 | |
854 | return 0; |
855 | } |
856 | |
857 | void |
858 | uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, size_t size) |
859 | { |
860 | |
861 | size = round_page(size); |
862 | #if defined(PMAP_UNMAP_POOLPAGE) |
863 | if (size == PAGE_SIZE) { |
864 | paddr_t pa; |
865 | |
866 | pa = PMAP_UNMAP_POOLPAGE(addr); |
867 | uvm_pagefree(PHYS_TO_VM_PAGE(pa)); |
868 | return; |
869 | } |
870 | #endif /* PMAP_UNMAP_POOLPAGE */ |
871 | uvm_km_pgremove_intrsafe(kernel_map, addr, addr + size); |
872 | pmap_update(pmap_kernel()); |
873 | |
874 | vmem_free(vm, addr, size); |
875 | } |
876 | |
877 | bool |
878 | uvm_km_va_starved_p(void) |
879 | { |
880 | vmem_size_t total; |
881 | vmem_size_t free; |
882 | |
883 | if (kmem_arena == NULL) |
884 | return false; |
885 | |
886 | total = vmem_size(kmem_arena, VMEM_ALLOC|VMEM_FREE); |
887 | free = vmem_size(kmem_arena, VMEM_FREE); |
888 | |
889 | return (free < (total / 10)); |
890 | } |
891 | |