1 | /* $NetBSD: uvm_emap.c,v 1.11 2014/11/27 14:25:01 uebayasi Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Mindaugas Rasiukevicius and Andrew Doran. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | /* |
33 | * UVM ephemeral mapping interface. |
34 | */ |
35 | |
36 | /* |
37 | * Overview: |
38 | * |
39 | * On multiprocessor systems, frequent uses of pmap_kenter_pa/pmap_kremove |
40 | * for ephemeral mappings are not desirable because they likely involve |
41 | * TLB flush IPIs because that pmap_kernel() is shared among all LWPs. |
42 | * This interface can be used instead, to reduce the number of IPIs. |
43 | * |
44 | * For a single-page mapping, PMAP_DIRECT_MAP is likely a better choice |
45 | * if available. (__HAVE_DIRECT_MAP) |
46 | */ |
47 | |
48 | /* |
49 | * How to use: |
50 | * |
51 | * Map pages at the address: |
52 | * |
53 | * uvm_emap_enter(va, pgs, npages); |
54 | * gen = uvm_emap_produce(); |
55 | * |
56 | * Read pages via the mapping: |
57 | * |
58 | * uvm_emap_consume(gen); |
59 | * some_access(va); |
60 | * |
61 | * After finishing using the mapping: |
62 | * |
63 | * uvm_emap_remove(va, len); |
64 | */ |
65 | |
66 | /* |
67 | * Notes for pmap developers: |
68 | * |
69 | * Generic (more expensive) stubs are implemented for architectures which |
70 | * do not support pmap. |
71 | * |
72 | * Note that uvm_emap_update() is called from lower pmap(9) layer, while |
73 | * other functions call to pmap(9). Typical pattern of update in pmap: |
74 | * |
75 | * u_int gen = uvm_emap_gen_return(); |
76 | * tlbflush(); |
77 | * uvm_emap_update(); |
78 | * |
79 | * It is also used from IPI context, therefore functions must safe. |
80 | */ |
81 | |
82 | #include <sys/cdefs.h> |
83 | __KERNEL_RCSID(0, "$NetBSD: uvm_emap.c,v 1.11 2014/11/27 14:25:01 uebayasi Exp $" ); |
84 | |
85 | #include <sys/param.h> |
86 | #include <sys/kernel.h> |
87 | #include <sys/cpu.h> |
88 | #include <sys/atomic.h> |
89 | #include <sys/lwp.h> |
90 | #include <sys/vmem.h> |
91 | #include <sys/types.h> |
92 | |
93 | #include <uvm/uvm.h> |
94 | #include <uvm/uvm_extern.h> |
95 | |
96 | /* XXX: Arbitrary. */ |
97 | #ifdef _LP64 |
98 | #define UVM_EMAP_SIZE (128 * 1024 * 1024) /* 128 MB */ |
99 | #else |
100 | #define UVM_EMAP_SIZE (32 * 1024 * 1024) /* 32 MB */ |
101 | #endif |
102 | |
103 | static u_int _uvm_emap_gen[COHERENCY_UNIT - sizeof(u_int)] |
104 | __aligned(COHERENCY_UNIT); |
105 | |
106 | #define uvm_emap_gen (_uvm_emap_gen[0]) |
107 | |
108 | u_int uvm_emap_size = UVM_EMAP_SIZE; |
109 | static vaddr_t uvm_emap_va; |
110 | static vmem_t * uvm_emap_vmem; |
111 | |
112 | /* |
113 | * uvm_emap_init: initialize subsystem. |
114 | */ |
115 | void |
116 | uvm_emap_sysinit(void) |
117 | { |
118 | struct uvm_cpu *ucpu; |
119 | /* size_t qmax; */ |
120 | u_int i; |
121 | |
122 | uvm_emap_size = roundup(uvm_emap_size, PAGE_SIZE); |
123 | #if 0 |
124 | qmax = 16 * PAGE_SIZE; |
125 | uvm_emap_va = uvm_km_alloc(kernel_map, uvm_emap_size, 0, |
126 | UVM_KMF_VAONLY | UVM_KMF_WAITVA); |
127 | if (uvm_emap_va == 0) { |
128 | panic("uvm_emap_init: KVA allocation failed" ); |
129 | } |
130 | |
131 | uvm_emap_vmem = vmem_create("emap" , uvm_emap_va, uvm_emap_size, |
132 | PAGE_SIZE, NULL, NULL, NULL, qmax, VM_SLEEP, IPL_NONE); |
133 | if (uvm_emap_vmem == NULL) { |
134 | panic("uvm_emap_init: vmem creation failed" ); |
135 | } |
136 | #else |
137 | uvm_emap_va = 0; |
138 | uvm_emap_vmem = NULL; |
139 | #endif |
140 | /* Initial generation value is 1. */ |
141 | uvm_emap_gen = 1; |
142 | for (i = 0; i < maxcpus; i++) { |
143 | ucpu = uvm.cpus[i]; |
144 | if (ucpu != NULL) { |
145 | ucpu->emap_gen = 1; |
146 | } |
147 | } |
148 | } |
149 | |
150 | /* |
151 | * uvm_emap_alloc: allocate a window. |
152 | */ |
153 | vaddr_t |
154 | uvm_emap_alloc(vsize_t size, bool waitok) |
155 | { |
156 | vmem_addr_t addr; |
157 | |
158 | KASSERT(size > 0); |
159 | KASSERT(round_page(size) == size); |
160 | |
161 | if (vmem_alloc(uvm_emap_vmem, size, |
162 | VM_INSTANTFIT | (waitok ? VM_SLEEP : VM_NOSLEEP), &addr) == 0) |
163 | return (vaddr_t)addr; |
164 | |
165 | return (vaddr_t)0; |
166 | } |
167 | |
168 | /* |
169 | * uvm_emap_free: free a window. |
170 | */ |
171 | void |
172 | uvm_emap_free(vaddr_t va, size_t size) |
173 | { |
174 | |
175 | KASSERT(va >= uvm_emap_va); |
176 | KASSERT(size <= uvm_emap_size); |
177 | KASSERT(va + size <= uvm_emap_va + uvm_emap_size); |
178 | |
179 | vmem_free(uvm_emap_vmem, va, size); |
180 | } |
181 | |
182 | #ifdef __HAVE_PMAP_EMAP |
183 | |
184 | /* |
185 | * uvm_emap_enter: enter a new mapping, without TLB flush. |
186 | */ |
187 | void |
188 | uvm_emap_enter(vaddr_t va, struct vm_page **pgs, u_int npages) |
189 | { |
190 | paddr_t pa; |
191 | u_int n; |
192 | |
193 | for (n = 0; n < npages; n++, va += PAGE_SIZE) { |
194 | pa = VM_PAGE_TO_PHYS(pgs[n]); |
195 | pmap_emap_enter(va, pa, VM_PROT_READ); |
196 | } |
197 | } |
198 | |
199 | /* |
200 | * uvm_emap_remove: remove a mapping. |
201 | */ |
202 | void |
203 | uvm_emap_remove(vaddr_t sva, vsize_t len) |
204 | { |
205 | |
206 | pmap_emap_remove(sva, len); |
207 | } |
208 | |
209 | /* |
210 | * uvm_emap_gen_return: get the global generation number. |
211 | * |
212 | * => can be called from IPI handler, therefore function must be safe. |
213 | */ |
214 | u_int |
215 | uvm_emap_gen_return(void) |
216 | { |
217 | u_int gen; |
218 | |
219 | gen = uvm_emap_gen; |
220 | if (__predict_false(gen == UVM_EMAP_INACTIVE)) { |
221 | /* |
222 | * Instead of looping, just increase in our side. |
223 | * Other thread could race and increase it again, |
224 | * but without any negative effect. |
225 | */ |
226 | gen = atomic_inc_uint_nv(&uvm_emap_gen); |
227 | } |
228 | KASSERT(gen != UVM_EMAP_INACTIVE); |
229 | return gen; |
230 | } |
231 | |
232 | /* |
233 | * uvm_emap_switch: if the CPU is 'behind' the LWP in emap visibility, |
234 | * perform TLB flush and thus update the local view. Main purpose is |
235 | * to handle kernel preemption, while emap is in use. |
236 | * |
237 | * => called from mi_switch(), when LWP returns after block or preempt. |
238 | */ |
239 | void |
240 | uvm_emap_switch(lwp_t *l) |
241 | { |
242 | struct uvm_cpu *ucpu; |
243 | u_int curgen, gen; |
244 | |
245 | KASSERT(kpreempt_disabled()); |
246 | |
247 | /* If LWP did not use emap, then nothing to do. */ |
248 | if (__predict_true(l->l_emap_gen == UVM_EMAP_INACTIVE)) { |
249 | return; |
250 | } |
251 | |
252 | /* |
253 | * No need to synchronise if generation number of current CPU is |
254 | * newer than the number of this LWP. |
255 | * |
256 | * This test assumes two's complement arithmetic and allows |
257 | * ~2B missed updates before it will produce bad results. |
258 | */ |
259 | ucpu = curcpu()->ci_data.cpu_uvm; |
260 | curgen = ucpu->emap_gen; |
261 | gen = l->l_emap_gen; |
262 | if (__predict_true((signed int)(curgen - gen) >= 0)) { |
263 | return; |
264 | } |
265 | |
266 | /* |
267 | * See comments in uvm_emap_consume() about memory |
268 | * barriers and race conditions. |
269 | */ |
270 | curgen = uvm_emap_gen_return(); |
271 | pmap_emap_sync(false); |
272 | ucpu->emap_gen = curgen; |
273 | } |
274 | |
275 | /* |
276 | * uvm_emap_consume: update the current CPU and LWP to the given generation |
277 | * of the emap. In a case of LWP migration to a different CPU after block |
278 | * or preempt, uvm_emap_switch() will synchronise. |
279 | * |
280 | * => may be called from both interrupt and thread context. |
281 | */ |
282 | void |
283 | uvm_emap_consume(u_int gen) |
284 | { |
285 | struct cpu_info *ci; |
286 | struct uvm_cpu *ucpu; |
287 | lwp_t *l = curlwp; |
288 | u_int curgen; |
289 | |
290 | if (gen == UVM_EMAP_INACTIVE) { |
291 | return; |
292 | } |
293 | |
294 | /* |
295 | * No need to synchronise if generation number of current CPU is |
296 | * newer than the number of this LWP. |
297 | * |
298 | * This test assumes two's complement arithmetic and allows |
299 | * ~2B missed updates before it will produce bad results. |
300 | */ |
301 | kpreempt_disable(); |
302 | ci = l->l_cpu; |
303 | ucpu = ci->ci_data.cpu_uvm; |
304 | if (__predict_true((signed int)(ucpu->emap_gen - gen) >= 0)) { |
305 | l->l_emap_gen = ucpu->emap_gen; |
306 | kpreempt_enable(); |
307 | return; |
308 | } |
309 | |
310 | /* |
311 | * Record the current generation _before_ issuing the TLB flush. |
312 | * No need for a memory barrier before, as reading a stale value |
313 | * for uvm_emap_gen is not a problem. |
314 | * |
315 | * pmap_emap_sync() must implicitly perform a full memory barrier, |
316 | * which prevents us from fetching a value from after the TLB flush |
317 | * has occurred (which would be bad). |
318 | * |
319 | * We can race with an interrupt on the current CPU updating the |
320 | * counter to a newer value. This could cause us to set a stale |
321 | * value into ucpu->emap_gen, overwriting a newer update from the |
322 | * interrupt. However, it does not matter since: |
323 | * (1) Interrupts always run to completion or block. |
324 | * (2) Interrupts will only ever install a newer value and, |
325 | * (3) We will roll the value forward later. |
326 | */ |
327 | curgen = uvm_emap_gen_return(); |
328 | pmap_emap_sync(true); |
329 | ucpu->emap_gen = curgen; |
330 | l->l_emap_gen = curgen; |
331 | KASSERT((signed int)(curgen - gen) >= 0); |
332 | kpreempt_enable(); |
333 | } |
334 | |
335 | /* |
336 | * uvm_emap_produce: increment emap generation counter. |
337 | * |
338 | * => pmap updates must be globally visible. |
339 | * => caller must have already entered mappings. |
340 | * => may be called from both interrupt and thread context. |
341 | */ |
342 | u_int |
343 | uvm_emap_produce(void) |
344 | { |
345 | u_int gen; |
346 | again: |
347 | gen = atomic_inc_uint_nv(&uvm_emap_gen); |
348 | if (__predict_false(gen == UVM_EMAP_INACTIVE)) { |
349 | goto again; |
350 | } |
351 | return gen; |
352 | } |
353 | |
354 | /* |
355 | * uvm_emap_update: update global emap generation number for current CPU. |
356 | * |
357 | * Function is called by MD code (eg. pmap) to take advantage of TLB flushes |
358 | * initiated for other reasons, that sync the emap as a side effect. Note |
359 | * update should be performed before the actual TLB flush, to avoid race |
360 | * with newly generated number. |
361 | * |
362 | * => can be called from IPI handler, therefore function must be safe. |
363 | * => should be called _after_ TLB flush. |
364 | * => emap generation number should be taken _before_ TLB flush. |
365 | * => must be called with preemption disabled. |
366 | */ |
367 | void |
368 | uvm_emap_update(u_int gen) |
369 | { |
370 | struct uvm_cpu *ucpu; |
371 | |
372 | /* |
373 | * See comments in uvm_emap_consume() about memory barriers and |
374 | * race conditions. Store is atomic if emap_gen size is word. |
375 | */ |
376 | CTASSERT(sizeof(ucpu->emap_gen) == sizeof(int)); |
377 | /* XXX: KASSERT(kpreempt_disabled()); */ |
378 | |
379 | ucpu = curcpu()->ci_data.cpu_uvm; |
380 | ucpu->emap_gen = gen; |
381 | } |
382 | |
383 | #else |
384 | |
385 | /* |
386 | * Stubs for architectures which do not support emap. |
387 | */ |
388 | |
389 | void |
390 | uvm_emap_enter(vaddr_t va, struct vm_page **pgs, u_int npages) |
391 | { |
392 | paddr_t pa; |
393 | u_int n; |
394 | |
395 | for (n = 0; n < npages; n++, va += PAGE_SIZE) { |
396 | pa = VM_PAGE_TO_PHYS(pgs[n]); |
397 | pmap_kenter_pa(va, pa, VM_PROT_READ, 0); |
398 | } |
399 | pmap_update(pmap_kernel()); |
400 | } |
401 | |
402 | void |
403 | uvm_emap_remove(vaddr_t sva, vsize_t len) |
404 | { |
405 | |
406 | pmap_kremove(sva, len); |
407 | pmap_update(pmap_kernel()); |
408 | } |
409 | |
410 | #endif |
411 | |