1 | /* $NetBSD: cpu.c,v 1.104 2016/07/07 06:55:40 msaitoh Exp $ */ |
2 | /* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp */ |
3 | |
4 | /*- |
5 | * Copyright (c) 2000 The NetBSD Foundation, Inc. |
6 | * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, |
7 | * All rights reserved. |
8 | * |
9 | * This code is derived from software contributed to The NetBSD Foundation |
10 | * by RedBack Networks Inc. |
11 | * |
12 | * Author: Bill Sommerfeld |
13 | * |
14 | * Redistribution and use in source and binary forms, with or without |
15 | * modification, are permitted provided that the following conditions |
16 | * are met: |
17 | * 1. Redistributions of source code must retain the above copyright |
18 | * notice, this list of conditions and the following disclaimer. |
19 | * 2. Redistributions in binary form must reproduce the above copyright |
20 | * notice, this list of conditions and the following disclaimer in the |
21 | * documentation and/or other materials provided with the distribution. |
22 | * |
23 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
24 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
25 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
26 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
27 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
28 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
30 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
31 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
32 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | */ |
35 | |
36 | /* |
37 | * Copyright (c) 1999 Stefan Grefen |
38 | * |
39 | * Redistribution and use in source and binary forms, with or without |
40 | * modification, are permitted provided that the following conditions |
41 | * are met: |
42 | * 1. Redistributions of source code must retain the above copyright |
43 | * notice, this list of conditions and the following disclaimer. |
44 | * 2. Redistributions in binary form must reproduce the above copyright |
45 | * notice, this list of conditions and the following disclaimer in the |
46 | * documentation and/or other materials provided with the distribution. |
47 | * 3. All advertising materials mentioning features or use of this software |
48 | * must display the following acknowledgement: |
49 | * This product includes software developed by the NetBSD |
50 | * Foundation, Inc. and its contributors. |
51 | * 4. Neither the name of The NetBSD Foundation nor the names of its |
52 | * contributors may be used to endorse or promote products derived |
53 | * from this software without specific prior written permission. |
54 | * |
55 | * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY |
56 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
57 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
58 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE |
59 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
60 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
61 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
62 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
63 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
64 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
65 | * SUCH DAMAGE. |
66 | */ |
67 | |
68 | #include <sys/cdefs.h> |
69 | __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.104 2016/07/07 06:55:40 msaitoh Exp $" ); |
70 | |
71 | #include "opt_ddb.h" |
72 | #include "opt_multiprocessor.h" |
73 | #include "opt_mpbios.h" /* for MPDEBUG */ |
74 | #include "opt_mtrr.h" |
75 | #include "opt_xen.h" |
76 | |
77 | #include "lapic.h" |
78 | #include "ioapic.h" |
79 | |
80 | #include <sys/param.h> |
81 | #include <sys/proc.h> |
82 | #include <sys/systm.h> |
83 | #include <sys/device.h> |
84 | #include <sys/kmem.h> |
85 | #include <sys/cpu.h> |
86 | #include <sys/cpufreq.h> |
87 | #include <sys/atomic.h> |
88 | #include <sys/reboot.h> |
89 | #include <sys/idle.h> |
90 | |
91 | #include <uvm/uvm.h> |
92 | |
93 | #include <machine/cpufunc.h> |
94 | #include <machine/cpuvar.h> |
95 | #include <machine/pmap.h> |
96 | #include <machine/vmparam.h> |
97 | #include <machine/mpbiosvar.h> |
98 | #include <machine/pcb.h> |
99 | #include <machine/specialreg.h> |
100 | #include <machine/segments.h> |
101 | #include <machine/gdt.h> |
102 | #include <machine/mtrr.h> |
103 | #include <machine/pio.h> |
104 | |
105 | #include <x86/fpu.h> |
106 | |
107 | #include <xen/xen.h> |
108 | #include <xen/xen-public/vcpu.h> |
109 | #include <xen/vcpuvar.h> |
110 | |
111 | #if NLAPIC > 0 |
112 | #include <machine/apicvar.h> |
113 | #include <machine/i82489reg.h> |
114 | #include <machine/i82489var.h> |
115 | #endif |
116 | |
117 | #include <dev/ic/mc146818reg.h> |
118 | #include <dev/isa/isareg.h> |
119 | |
120 | static int cpu_match(device_t, cfdata_t, void *); |
121 | static void cpu_attach(device_t, device_t, void *); |
122 | static void cpu_defer(device_t); |
123 | static int cpu_rescan(device_t, const char *, const int *); |
124 | static void cpu_childdetached(device_t, device_t); |
125 | static int vcpu_match(device_t, cfdata_t, void *); |
126 | static void vcpu_attach(device_t, device_t, void *); |
127 | static void cpu_attach_common(device_t, device_t, void *); |
128 | void cpu_offline_md(void); |
129 | |
130 | struct cpu_softc { |
131 | device_t sc_dev; /* device tree glue */ |
132 | struct cpu_info *sc_info; /* pointer to CPU info */ |
133 | bool sc_wasonline; |
134 | }; |
135 | |
136 | int mp_cpu_start(struct cpu_info *, vaddr_t); |
137 | void mp_cpu_start_cleanup(struct cpu_info *); |
138 | const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, |
139 | mp_cpu_start_cleanup }; |
140 | |
141 | CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc), |
142 | cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached); |
143 | |
144 | CFATTACH_DECL_NEW(vcpu, sizeof(struct cpu_softc), |
145 | vcpu_match, vcpu_attach, NULL, NULL); |
146 | |
147 | /* |
148 | * Statically-allocated CPU info for the primary CPU (or the only |
149 | * CPU, on uniprocessors). The CPU info list is initialized to |
150 | * point at it. |
151 | */ |
152 | #ifdef TRAPLOG |
153 | #include <machine/tlog.h> |
154 | struct tlog tlog_primary; |
155 | #endif |
156 | struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = { |
157 | .ci_dev = 0, |
158 | .ci_self = &cpu_info_primary, |
159 | .ci_idepth = -1, |
160 | .ci_curlwp = &lwp0, |
161 | .ci_curldt = -1, |
162 | #ifdef TRAPLOG |
163 | .ci_tlog = &tlog_primary, |
164 | #endif |
165 | |
166 | }; |
167 | struct cpu_info phycpu_info_primary __aligned(CACHE_LINE_SIZE) = { |
168 | .ci_dev = 0, |
169 | .ci_self = &phycpu_info_primary, |
170 | }; |
171 | |
172 | struct cpu_info *cpu_info_list = &cpu_info_primary; |
173 | struct cpu_info *phycpu_info_list = &phycpu_info_primary; |
174 | |
175 | uint32_t cpu_feature[7]; /* X86 CPUID feature bits |
176 | * [0] basic features %edx |
177 | * [1] basic features %ecx |
178 | * [2] extended features %edx |
179 | * [3] extended features %ecx |
180 | * [4] VIA padlock features |
181 | * [5] structured extended features cpuid.7:%ebx |
182 | * [6] structured extended features cpuid.7:%ecx |
183 | */ |
184 | |
185 | bool x86_mp_online; |
186 | paddr_t mp_trampoline_paddr = MP_TRAMPOLINE; |
187 | |
188 | #if defined(MULTIPROCESSOR) |
189 | void cpu_hatch(void *); |
190 | static void cpu_boot_secondary(struct cpu_info *ci); |
191 | static void cpu_start_secondary(struct cpu_info *ci); |
192 | #endif /* MULTIPROCESSOR */ |
193 | |
194 | static int |
195 | cpu_match(device_t parent, cfdata_t match, void *aux) |
196 | { |
197 | |
198 | return 1; |
199 | } |
200 | |
201 | static void |
202 | cpu_attach(device_t parent, device_t self, void *aux) |
203 | { |
204 | struct cpu_softc *sc = device_private(self); |
205 | struct cpu_attach_args *caa = aux; |
206 | struct cpu_info *ci; |
207 | uintptr_t ptr; |
208 | static int nphycpu = 0; |
209 | |
210 | sc->sc_dev = self; |
211 | |
212 | /* |
213 | * If we're an Application Processor, allocate a cpu_info |
214 | * If we're the first attached CPU use the primary cpu_info, |
215 | * otherwise allocate a new one |
216 | */ |
217 | aprint_naive("\n" ); |
218 | aprint_normal("\n" ); |
219 | if (nphycpu > 0) { |
220 | struct cpu_info *tmp; |
221 | ptr = (uintptr_t)kmem_zalloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, |
222 | KM_SLEEP); |
223 | ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); |
224 | ci->ci_curldt = -1; |
225 | |
226 | tmp = phycpu_info_list; |
227 | while (tmp->ci_next) |
228 | tmp = tmp->ci_next; |
229 | |
230 | tmp->ci_next = ci; |
231 | } else { |
232 | ci = &phycpu_info_primary; |
233 | } |
234 | |
235 | ci->ci_self = ci; |
236 | sc->sc_info = ci; |
237 | |
238 | ci->ci_dev = self; |
239 | ci->ci_acpiid = caa->cpu_id; |
240 | ci->ci_cpuid = caa->cpu_number; |
241 | ci->ci_vcpu = NULL; |
242 | ci->ci_index = nphycpu++; |
243 | |
244 | if (!pmf_device_register(self, NULL, NULL)) |
245 | aprint_error_dev(self, "couldn't establish power handler\n" ); |
246 | |
247 | (void)config_defer(self, cpu_defer); |
248 | } |
249 | |
250 | static void |
251 | cpu_defer(device_t self) |
252 | { |
253 | cpu_rescan(self, NULL, NULL); |
254 | } |
255 | |
256 | static int |
257 | cpu_rescan(device_t self, const char *ifattr, const int *locators) |
258 | { |
259 | struct cpu_softc *sc = device_private(self); |
260 | struct cpufeature_attach_args cfaa; |
261 | struct cpu_info *ci = sc->sc_info; |
262 | |
263 | memset(&cfaa, 0, sizeof(cfaa)); |
264 | cfaa.ci = ci; |
265 | |
266 | if (ifattr_match(ifattr, "cpufeaturebus" )) { |
267 | |
268 | if (ci->ci_frequency == NULL) { |
269 | cfaa.name = "frequency" ; |
270 | ci->ci_frequency = config_found_ia(self, |
271 | "cpufeaturebus" , &cfaa, NULL); |
272 | } |
273 | } |
274 | |
275 | return 0; |
276 | } |
277 | |
278 | static void |
279 | cpu_childdetached(device_t self, device_t child) |
280 | { |
281 | struct cpu_softc *sc = device_private(self); |
282 | struct cpu_info *ci = sc->sc_info; |
283 | |
284 | if (ci->ci_frequency == child) |
285 | ci->ci_frequency = NULL; |
286 | } |
287 | |
288 | static int |
289 | vcpu_match(device_t parent, cfdata_t match, void *aux) |
290 | { |
291 | struct vcpu_attach_args *vcaa = aux; |
292 | struct vcpu_runstate_info vcr; |
293 | int error; |
294 | |
295 | if (strcmp(vcaa->vcaa_name, match->cf_name) == 0) { |
296 | error = HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, |
297 | vcaa->vcaa_caa.cpu_number, |
298 | &vcr); |
299 | switch (error) { |
300 | case 0: |
301 | return 1; |
302 | case -ENOENT: |
303 | return 0; |
304 | default: |
305 | panic("Unknown hypervisor error %d returned on vcpu runstate probe\n" , error); |
306 | } |
307 | } |
308 | |
309 | return 0; |
310 | } |
311 | |
312 | static void |
313 | vcpu_attach(device_t parent, device_t self, void *aux) |
314 | { |
315 | struct vcpu_attach_args *vcaa = aux; |
316 | |
317 | KASSERT(vcaa->vcaa_caa.cpu_func == NULL); |
318 | vcaa->vcaa_caa.cpu_func = &mp_cpu_funcs; |
319 | cpu_attach_common(parent, self, &vcaa->vcaa_caa); |
320 | |
321 | if (!pmf_device_register(self, NULL, NULL)) |
322 | aprint_error_dev(self, "couldn't establish power handler\n" ); |
323 | } |
324 | |
325 | static int |
326 | vcpu_is_up(struct cpu_info *ci) |
327 | { |
328 | KASSERT(ci != NULL); |
329 | return HYPERVISOR_vcpu_op(VCPUOP_is_up, ci->ci_cpuid, NULL); |
330 | } |
331 | |
332 | static void |
333 | cpu_vm_init(struct cpu_info *ci) |
334 | { |
335 | int ncolors = 2, i; |
336 | |
337 | for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) { |
338 | struct x86_cache_info *cai; |
339 | int tcolors; |
340 | |
341 | cai = &ci->ci_cinfo[i]; |
342 | |
343 | tcolors = atop(cai->cai_totalsize); |
344 | switch(cai->cai_associativity) { |
345 | case 0xff: |
346 | tcolors = 1; /* fully associative */ |
347 | break; |
348 | case 0: |
349 | case 1: |
350 | break; |
351 | default: |
352 | tcolors /= cai->cai_associativity; |
353 | } |
354 | ncolors = max(ncolors, tcolors); |
355 | } |
356 | |
357 | /* |
358 | * Knowing the size of the largest cache on this CPU, potentially |
359 | * re-color our pages. |
360 | */ |
361 | aprint_debug_dev(ci->ci_dev, "%d page colors\n" , ncolors); |
362 | uvm_page_recolor(ncolors); |
363 | pmap_tlb_cpu_init(ci); |
364 | } |
365 | |
366 | static void |
367 | cpu_attach_common(device_t parent, device_t self, void *aux) |
368 | { |
369 | struct cpu_softc *sc = device_private(self); |
370 | struct cpu_attach_args *caa = aux; |
371 | struct cpu_info *ci; |
372 | uintptr_t ptr; |
373 | int cpunum = caa->cpu_number; |
374 | static bool again = false; |
375 | |
376 | sc->sc_dev = self; |
377 | |
378 | /* |
379 | * If we're an Application Processor, allocate a cpu_info |
380 | * structure, otherwise use the primary's. |
381 | */ |
382 | if (caa->cpu_role == CPU_ROLE_AP) { |
383 | aprint_naive(": Application Processor\n" ); |
384 | ptr = (uintptr_t)kmem_alloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, |
385 | KM_SLEEP); |
386 | ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); |
387 | memset(ci, 0, sizeof(*ci)); |
388 | #ifdef TRAPLOG |
389 | ci->ci_tlog_base = kmem_zalloc(sizeof(struct tlog), KM_SLEEP); |
390 | #endif |
391 | } else { |
392 | aprint_naive(": %s Processor\n" , |
393 | caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot" ); |
394 | ci = &cpu_info_primary; |
395 | } |
396 | |
397 | ci->ci_self = ci; |
398 | sc->sc_info = ci; |
399 | ci->ci_dev = self; |
400 | ci->ci_cpuid = cpunum; |
401 | |
402 | KASSERT(HYPERVISOR_shared_info != NULL); |
403 | KASSERT(cpunum < XEN_LEGACY_MAX_VCPUS); |
404 | ci->ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[cpunum]; |
405 | |
406 | KASSERT(ci->ci_func == 0); |
407 | ci->ci_func = caa->cpu_func; |
408 | aprint_normal("\n" ); |
409 | |
410 | /* Must be called before mi_cpu_attach(). */ |
411 | cpu_vm_init(ci); |
412 | |
413 | if (caa->cpu_role == CPU_ROLE_AP) { |
414 | int error; |
415 | |
416 | error = mi_cpu_attach(ci); |
417 | |
418 | KASSERT(ci->ci_data.cpu_idlelwp != NULL); |
419 | if (error != 0) { |
420 | aprint_error_dev(self, |
421 | "mi_cpu_attach failed with %d\n" , error); |
422 | return; |
423 | } |
424 | |
425 | } else { |
426 | KASSERT(ci->ci_data.cpu_idlelwp != NULL); |
427 | } |
428 | |
429 | KASSERT(ci->ci_cpuid == ci->ci_index); |
430 | #ifdef __x86_64__ |
431 | /* No user PGD mapped for this CPU yet */ |
432 | ci->ci_xen_current_user_pgd = 0; |
433 | #endif |
434 | #if defined(__x86_64__) || defined(PAE) |
435 | mutex_init(&ci->ci_kpm_mtx, MUTEX_DEFAULT, IPL_VM); |
436 | #endif |
437 | pmap_reference(pmap_kernel()); |
438 | ci->ci_pmap = pmap_kernel(); |
439 | ci->ci_tlbstate = TLBSTATE_STALE; |
440 | |
441 | /* |
442 | * Boot processor may not be attached first, but the below |
443 | * must be done to allow booting other processors. |
444 | */ |
445 | if (!again) { |
446 | atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); |
447 | /* Basic init. */ |
448 | cpu_intr_init(ci); |
449 | cpu_get_tsc_freq(ci); |
450 | cpu_init(ci); |
451 | pmap_cpu_init_late(ci); |
452 | |
453 | /* Every processor needs to init its own ipi h/w (similar to lapic) */ |
454 | xen_ipi_init(); |
455 | |
456 | /* Make sure DELAY() is initialized. */ |
457 | DELAY(1); |
458 | again = true; |
459 | } |
460 | |
461 | /* further PCB init done later. */ |
462 | |
463 | switch (caa->cpu_role) { |
464 | case CPU_ROLE_SP: |
465 | atomic_or_32(&ci->ci_flags, CPUF_SP); |
466 | cpu_identify(ci); |
467 | x86_cpu_idle_init(); |
468 | |
469 | break; |
470 | |
471 | case CPU_ROLE_BP: |
472 | atomic_or_32(&ci->ci_flags, CPUF_BSP); |
473 | cpu_identify(ci); |
474 | x86_cpu_idle_init(); |
475 | |
476 | break; |
477 | |
478 | case CPU_ROLE_AP: |
479 | atomic_or_32(&ci->ci_flags, CPUF_AP); |
480 | |
481 | /* |
482 | * report on an AP |
483 | */ |
484 | |
485 | #if defined(MULTIPROCESSOR) |
486 | /* interrupt handler stack */ |
487 | cpu_intr_init(ci); |
488 | |
489 | /* Setup per-cpu memory for gdt */ |
490 | gdt_alloc_cpu(ci); |
491 | |
492 | pmap_cpu_init_late(ci); |
493 | cpu_start_secondary(ci); |
494 | |
495 | if (ci->ci_flags & CPUF_PRESENT) { |
496 | struct cpu_info *tmp; |
497 | |
498 | cpu_identify(ci); |
499 | tmp = cpu_info_list; |
500 | while (tmp->ci_next) |
501 | tmp = tmp->ci_next; |
502 | |
503 | tmp->ci_next = ci; |
504 | } |
505 | #else |
506 | aprint_error_dev(ci->ci_dev, "not started\n" ); |
507 | #endif |
508 | break; |
509 | |
510 | default: |
511 | panic("unknown processor type??\n" ); |
512 | } |
513 | |
514 | #ifdef MPVERBOSE |
515 | if (mp_verbose) { |
516 | struct lwp *l = ci->ci_data.cpu_idlelwp; |
517 | struct pcb *pcb = lwp_getpcb(l); |
518 | |
519 | aprint_verbose_dev(self, |
520 | "idle lwp at %p, idle sp at 0x%p\n" , |
521 | l, |
522 | #ifdef i386 |
523 | (void *)pcb->pcb_esp |
524 | #else /* i386 */ |
525 | (void *)pcb->pcb_rsp |
526 | #endif /* i386 */ |
527 | ); |
528 | |
529 | } |
530 | #endif /* MPVERBOSE */ |
531 | } |
532 | |
533 | /* |
534 | * Initialize the processor appropriately. |
535 | */ |
536 | |
537 | void |
538 | cpu_init(struct cpu_info *ci) |
539 | { |
540 | |
541 | /* |
542 | * If we have FXSAVE/FXRESTOR, use them. |
543 | */ |
544 | if (cpu_feature[0] & CPUID_FXSR) { |
545 | lcr4(rcr4() | CR4_OSFXSR); |
546 | |
547 | /* |
548 | * If we have SSE/SSE2, enable XMM exceptions. |
549 | */ |
550 | if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2)) |
551 | lcr4(rcr4() | CR4_OSXMMEXCPT); |
552 | } |
553 | |
554 | atomic_or_32(&ci->ci_flags, CPUF_RUNNING); |
555 | } |
556 | |
557 | |
558 | #ifdef MULTIPROCESSOR |
559 | |
560 | void |
561 | cpu_boot_secondary_processors(void) |
562 | { |
563 | struct cpu_info *ci; |
564 | u_long i; |
565 | for (i = 0; i < maxcpus; i++) { |
566 | ci = cpu_lookup(i); |
567 | if (ci == NULL) |
568 | continue; |
569 | if (ci->ci_data.cpu_idlelwp == NULL) |
570 | continue; |
571 | if ((ci->ci_flags & CPUF_PRESENT) == 0) |
572 | continue; |
573 | if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) |
574 | continue; |
575 | cpu_boot_secondary(ci); |
576 | } |
577 | |
578 | x86_mp_online = true; |
579 | } |
580 | |
581 | static void |
582 | cpu_init_idle_lwp(struct cpu_info *ci) |
583 | { |
584 | struct lwp *l = ci->ci_data.cpu_idlelwp; |
585 | struct pcb *pcb = lwp_getpcb(l); |
586 | |
587 | pcb->pcb_cr0 = rcr0(); |
588 | } |
589 | |
590 | void |
591 | cpu_init_idle_lwps(void) |
592 | { |
593 | struct cpu_info *ci; |
594 | u_long i; |
595 | |
596 | for (i = 0; i < maxcpus; i++) { |
597 | ci = cpu_lookup(i); |
598 | if (ci == NULL) |
599 | continue; |
600 | if (ci->ci_data.cpu_idlelwp == NULL) |
601 | continue; |
602 | if ((ci->ci_flags & CPUF_PRESENT) == 0) |
603 | continue; |
604 | cpu_init_idle_lwp(ci); |
605 | } |
606 | } |
607 | |
608 | static void |
609 | cpu_start_secondary(struct cpu_info *ci) |
610 | { |
611 | int i; |
612 | |
613 | aprint_debug_dev(ci->ci_dev, "starting\n" ); |
614 | |
615 | ci->ci_curlwp = ci->ci_data.cpu_idlelwp; |
616 | |
617 | if (CPU_STARTUP(ci, (vaddr_t) cpu_hatch) != 0) { |
618 | return; |
619 | } |
620 | |
621 | /* |
622 | * wait for it to become ready |
623 | */ |
624 | for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { |
625 | delay(10); |
626 | } |
627 | if ((ci->ci_flags & CPUF_PRESENT) == 0) { |
628 | aprint_error_dev(ci->ci_dev, "failed to become ready\n" ); |
629 | #if defined(MPDEBUG) && defined(DDB) |
630 | printf("dropping into debugger; continue from here to resume boot\n" ); |
631 | Debugger(); |
632 | #endif |
633 | } |
634 | |
635 | CPU_START_CLEANUP(ci); |
636 | } |
637 | |
638 | void |
639 | cpu_boot_secondary(struct cpu_info *ci) |
640 | { |
641 | int i; |
642 | atomic_or_32(&ci->ci_flags, CPUF_GO); |
643 | for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { |
644 | delay(10); |
645 | } |
646 | if ((ci->ci_flags & CPUF_RUNNING) == 0) { |
647 | aprint_error_dev(ci->ci_dev, "CPU failed to start\n" ); |
648 | #if defined(MPDEBUG) && defined(DDB) |
649 | printf("dropping into debugger; continue from here to resume boot\n" ); |
650 | Debugger(); |
651 | #endif |
652 | } |
653 | } |
654 | |
655 | /* |
656 | * APs end up here immediately after initialisation and VCPUOP_up in |
657 | * mp_cpu_start(). |
658 | * At this point, we are running in the idle pcb/idle stack of the new |
659 | * CPU. This function jumps to the idle loop and starts looking for |
660 | * work. |
661 | */ |
662 | extern void x86_64_tls_switch(struct lwp *); |
663 | void |
664 | cpu_hatch(void *v) |
665 | { |
666 | struct cpu_info *ci = (struct cpu_info *)v; |
667 | struct pcb *pcb; |
668 | int s, i; |
669 | |
670 | /* Setup TLS and kernel GS/FS */ |
671 | cpu_init_msrs(ci, true); |
672 | cpu_init_idt(); |
673 | gdt_init_cpu(ci); |
674 | |
675 | cpu_probe(ci); |
676 | |
677 | atomic_or_32(&ci->ci_flags, CPUF_PRESENT); |
678 | |
679 | while ((ci->ci_flags & CPUF_GO) == 0) { |
680 | /* Don't use delay, boot CPU may be patching the text. */ |
681 | for (i = 10000; i != 0; i--) |
682 | x86_pause(); |
683 | } |
684 | |
685 | /* Because the text may have been patched in x86_patch(). */ |
686 | x86_flush(); |
687 | tlbflushg(); |
688 | |
689 | KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); |
690 | |
691 | pcb = lwp_getpcb(curlwp); |
692 | pcb->pcb_cr3 = pmap_pdirpa(pmap_kernel(), 0); |
693 | pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp); |
694 | |
695 | xen_ipi_init(); |
696 | |
697 | xen_initclocks(); |
698 | |
699 | #ifdef __x86_64__ |
700 | fpuinit(ci); |
701 | #endif |
702 | |
703 | lldt(GSEL(GLDT_SEL, SEL_KPL)); |
704 | |
705 | cpu_init(ci); |
706 | cpu_get_tsc_freq(ci); |
707 | |
708 | s = splhigh(); |
709 | x86_enable_intr(); |
710 | splx(s); |
711 | |
712 | aprint_debug_dev(ci->ci_dev, "running\n" ); |
713 | |
714 | cpu_switchto(NULL, ci->ci_data.cpu_idlelwp, true); |
715 | |
716 | idle_loop(NULL); |
717 | KASSERT(false); |
718 | } |
719 | |
720 | #if defined(DDB) |
721 | |
722 | #include <ddb/db_output.h> |
723 | #include <machine/db_machdep.h> |
724 | |
725 | /* |
726 | * Dump CPU information from ddb. |
727 | */ |
728 | void |
729 | cpu_debug_dump(void) |
730 | { |
731 | struct cpu_info *ci; |
732 | CPU_INFO_ITERATOR cii; |
733 | |
734 | db_printf("addr dev id flags ipis curlwp fpcurlwp\n" ); |
735 | for (CPU_INFO_FOREACH(cii, ci)) { |
736 | db_printf("%p %s %ld %x %x %10p %10p\n" , |
737 | ci, |
738 | ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), |
739 | (long)ci->ci_cpuid, |
740 | ci->ci_flags, ci->ci_ipis, |
741 | ci->ci_curlwp, |
742 | ci->ci_fpcurlwp); |
743 | } |
744 | } |
745 | #endif /* DDB */ |
746 | |
747 | #endif /* MULTIPROCESSOR */ |
748 | |
749 | extern void hypervisor_callback(void); |
750 | extern void failsafe_callback(void); |
751 | #ifdef __x86_64__ |
752 | typedef void (vector)(void); |
753 | extern vector Xsyscall, Xsyscall32; |
754 | #endif |
755 | |
756 | /* |
757 | * Setup the "trampoline". On Xen, we setup nearly all cpu context |
758 | * outside a trampoline, so we prototype and call targetip like so: |
759 | * void targetip(struct cpu_info *); |
760 | */ |
761 | |
762 | static void |
763 | gdt_prepframes(paddr_t *frames, vaddr_t base, uint32_t entries) |
764 | { |
765 | int i; |
766 | for (i = 0; i < roundup(entries, PAGE_SIZE) >> PAGE_SHIFT; i++) { |
767 | |
768 | frames[i] = ((paddr_t) xpmap_ptetomach( |
769 | (pt_entry_t *) (base + (i << PAGE_SHIFT)))) |
770 | >> PAGE_SHIFT; |
771 | |
772 | /* Mark Read-only */ |
773 | pmap_pte_clearbits(kvtopte(base + (i << PAGE_SHIFT)), |
774 | PG_RW); |
775 | } |
776 | } |
777 | |
778 | #ifdef __x86_64__ |
779 | extern char *ldtstore; |
780 | |
781 | static void |
782 | xen_init_amd64_vcpuctxt(struct cpu_info *ci, |
783 | struct vcpu_guest_context *initctx, |
784 | void targetrip(struct cpu_info *)) |
785 | { |
786 | /* page frames to point at GDT */ |
787 | extern int gdt_size; |
788 | paddr_t frames[16]; |
789 | psize_t gdt_ents; |
790 | |
791 | struct lwp *l; |
792 | struct pcb *pcb; |
793 | |
794 | volatile struct vcpu_info *vci; |
795 | |
796 | KASSERT(ci != NULL); |
797 | KASSERT(ci != &cpu_info_primary); |
798 | KASSERT(initctx != NULL); |
799 | KASSERT(targetrip != NULL); |
800 | |
801 | memset(initctx, 0, sizeof *initctx); |
802 | |
803 | gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT; |
804 | KASSERT(gdt_ents <= 16); |
805 | |
806 | gdt_prepframes(frames, (vaddr_t) ci->ci_gdt, gdt_ents); |
807 | |
808 | /* Initialise the vcpu context: We use idle_loop()'s pcb context. */ |
809 | |
810 | l = ci->ci_data.cpu_idlelwp; |
811 | |
812 | KASSERT(l != NULL); |
813 | pcb = lwp_getpcb(l); |
814 | KASSERT(pcb != NULL); |
815 | |
816 | /* resume with interrupts off */ |
817 | vci = ci->ci_vcpu; |
818 | vci->evtchn_upcall_mask = 1; |
819 | xen_mb(); |
820 | |
821 | /* resume in kernel-mode */ |
822 | initctx->flags = VGCF_in_kernel | VGCF_online; |
823 | |
824 | /* Stack and entry points: |
825 | * We arrange for the stack frame for cpu_hatch() to |
826 | * appear as a callee frame of lwp_trampoline(). Being a |
827 | * leaf frame prevents trampling on any of the MD stack setup |
828 | * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop() |
829 | */ |
830 | |
831 | initctx->user_regs.rdi = (uint64_t) ci; /* targetrip(ci); */ |
832 | initctx->user_regs.rip = (vaddr_t) targetrip; |
833 | |
834 | initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); |
835 | |
836 | initctx->user_regs.rflags = pcb->pcb_flags; |
837 | initctx->user_regs.rsp = pcb->pcb_rsp; |
838 | |
839 | /* Data segments */ |
840 | initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); |
841 | initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL); |
842 | initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); |
843 | |
844 | /* GDT */ |
845 | memcpy(initctx->gdt_frames, frames, sizeof frames); |
846 | initctx->gdt_ents = gdt_ents; |
847 | |
848 | /* LDT */ |
849 | initctx->ldt_base = (unsigned long) ldtstore; |
850 | initctx->ldt_ents = LDT_SIZE >> 3; |
851 | |
852 | /* Kernel context state */ |
853 | initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL); |
854 | initctx->kernel_sp = pcb->pcb_rsp0; |
855 | initctx->ctrlreg[0] = pcb->pcb_cr0; |
856 | initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */ |
857 | initctx->ctrlreg[2] = (vaddr_t) targetrip; |
858 | /* |
859 | * Use pmap_kernel() L4 PD directly, until we setup the |
860 | * per-cpu L4 PD in pmap_cpu_init_late() |
861 | */ |
862 | initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_kpm_pdirpa))); |
863 | initctx->ctrlreg[4] = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT; |
864 | |
865 | |
866 | /* Xen callbacks */ |
867 | initctx->event_callback_eip = (unsigned long) hypervisor_callback; |
868 | initctx->failsafe_callback_eip = (unsigned long) failsafe_callback; |
869 | initctx->syscall_callback_eip = (unsigned long) Xsyscall; |
870 | |
871 | return; |
872 | } |
873 | #else /* i386 */ |
874 | extern union descriptor *ldt; |
875 | extern void Xsyscall(void); |
876 | |
877 | static void |
878 | xen_init_i386_vcpuctxt(struct cpu_info *ci, |
879 | struct vcpu_guest_context *initctx, |
880 | void targeteip(struct cpu_info *)) |
881 | { |
882 | /* page frames to point at GDT */ |
883 | extern int gdt_size; |
884 | paddr_t frames[16]; |
885 | psize_t gdt_ents; |
886 | |
887 | struct lwp *l; |
888 | struct pcb *pcb; |
889 | |
890 | volatile struct vcpu_info *vci; |
891 | |
892 | KASSERT(ci != NULL); |
893 | KASSERT(ci != &cpu_info_primary); |
894 | KASSERT(initctx != NULL); |
895 | KASSERT(targeteip != NULL); |
896 | |
897 | memset(initctx, 0, sizeof *initctx); |
898 | |
899 | gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT; |
900 | KASSERT(gdt_ents <= 16); |
901 | |
902 | gdt_prepframes(frames, (vaddr_t) ci->ci_gdt, gdt_ents); |
903 | |
904 | /* |
905 | * Initialise the vcpu context: |
906 | * We use this cpu's idle_loop() pcb context. |
907 | */ |
908 | |
909 | l = ci->ci_data.cpu_idlelwp; |
910 | |
911 | KASSERT(l != NULL); |
912 | pcb = lwp_getpcb(l); |
913 | KASSERT(pcb != NULL); |
914 | |
915 | /* resume with interrupts off */ |
916 | vci = ci->ci_vcpu; |
917 | vci->evtchn_upcall_mask = 1; |
918 | xen_mb(); |
919 | |
920 | /* resume in kernel-mode */ |
921 | initctx->flags = VGCF_in_kernel | VGCF_online; |
922 | |
923 | /* Stack frame setup for cpu_hatch(): |
924 | * We arrange for the stack frame for cpu_hatch() to |
925 | * appear as a callee frame of lwp_trampoline(). Being a |
926 | * leaf frame prevents trampling on any of the MD stack setup |
927 | * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop() |
928 | */ |
929 | |
930 | initctx->user_regs.esp = pcb->pcb_esp - 4; /* Leave word for |
931 | arg1 */ |
932 | { /* targeteip(ci); */ |
933 | uint32_t *arg = (uint32_t *) initctx->user_regs.esp; |
934 | arg[1] = (uint32_t) ci; /* arg1 */ |
935 | |
936 | } |
937 | |
938 | initctx->user_regs.eip = (vaddr_t) targeteip; |
939 | initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); |
940 | initctx->user_regs.eflags |= pcb->pcb_iopl; |
941 | |
942 | /* Data segments */ |
943 | initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); |
944 | initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL); |
945 | initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); |
946 | initctx->user_regs.fs = GSEL(GDATA_SEL, SEL_KPL); |
947 | |
948 | /* GDT */ |
949 | memcpy(initctx->gdt_frames, frames, sizeof frames); |
950 | initctx->gdt_ents = gdt_ents; |
951 | |
952 | /* LDT */ |
953 | initctx->ldt_base = (unsigned long) ldt; |
954 | initctx->ldt_ents = NLDT; |
955 | |
956 | /* Kernel context state */ |
957 | initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL); |
958 | initctx->kernel_sp = pcb->pcb_esp0; |
959 | initctx->ctrlreg[0] = pcb->pcb_cr0; |
960 | initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */ |
961 | initctx->ctrlreg[2] = (vaddr_t) targeteip; |
962 | #ifdef PAE |
963 | initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_pae_l3_pdirpa))); |
964 | #else /* PAE */ |
965 | initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(pcb->pcb_cr3))); |
966 | #endif /* PAE */ |
967 | initctx->ctrlreg[4] = /* CR4_PAE | */CR4_OSFXSR | CR4_OSXMMEXCPT; |
968 | |
969 | |
970 | /* Xen callbacks */ |
971 | initctx->event_callback_eip = (unsigned long) hypervisor_callback; |
972 | initctx->event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); |
973 | initctx->failsafe_callback_eip = (unsigned long) failsafe_callback; |
974 | initctx->failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); |
975 | |
976 | return; |
977 | } |
978 | #endif /* __x86_64__ */ |
979 | |
980 | int |
981 | mp_cpu_start(struct cpu_info *ci, vaddr_t target) |
982 | { |
983 | |
984 | int hyperror; |
985 | struct vcpu_guest_context vcpuctx; |
986 | |
987 | KASSERT(ci != NULL); |
988 | KASSERT(ci != &cpu_info_primary); |
989 | KASSERT(ci->ci_flags & CPUF_AP); |
990 | |
991 | #ifdef __x86_64__ |
992 | xen_init_amd64_vcpuctxt(ci, &vcpuctx, (void (*)(struct cpu_info *))target); |
993 | #else /* i386 */ |
994 | xen_init_i386_vcpuctxt(ci, &vcpuctx, (void (*)(struct cpu_info *))target); |
995 | #endif /* __x86_64__ */ |
996 | |
997 | /* Initialise the given vcpu to execute cpu_hatch(ci); */ |
998 | if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_initialise, ci->ci_cpuid, &vcpuctx))) { |
999 | aprint_error(": context initialisation failed. errno = %d\n" , hyperror); |
1000 | return hyperror; |
1001 | } |
1002 | |
1003 | /* Start it up */ |
1004 | |
1005 | /* First bring it down */ |
1006 | if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_down, ci->ci_cpuid, NULL))) { |
1007 | aprint_error(": VCPUOP_down hypervisor command failed. errno = %d\n" , hyperror); |
1008 | return hyperror; |
1009 | } |
1010 | |
1011 | if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_up, ci->ci_cpuid, NULL))) { |
1012 | aprint_error(": VCPUOP_up hypervisor command failed. errno = %d\n" , hyperror); |
1013 | return hyperror; |
1014 | } |
1015 | |
1016 | if (!vcpu_is_up(ci)) { |
1017 | aprint_error(": did not come up\n" ); |
1018 | return -1; |
1019 | } |
1020 | |
1021 | return 0; |
1022 | } |
1023 | |
1024 | void |
1025 | mp_cpu_start_cleanup(struct cpu_info *ci) |
1026 | { |
1027 | if (vcpu_is_up(ci)) { |
1028 | aprint_debug_dev(ci->ci_dev, "is started.\n" ); |
1029 | } |
1030 | else { |
1031 | aprint_error_dev(ci->ci_dev, "did not start up.\n" ); |
1032 | } |
1033 | |
1034 | } |
1035 | |
1036 | void |
1037 | cpu_init_msrs(struct cpu_info *ci, bool full) |
1038 | { |
1039 | #ifdef __x86_64__ |
1040 | if (full) { |
1041 | HYPERVISOR_set_segment_base (SEGBASE_FS, 0); |
1042 | HYPERVISOR_set_segment_base (SEGBASE_GS_KERNEL, (uint64_t) ci); |
1043 | HYPERVISOR_set_segment_base (SEGBASE_GS_USER, 0); |
1044 | } |
1045 | #endif /* __x86_64__ */ |
1046 | |
1047 | if (cpu_feature[2] & CPUID_NOX) |
1048 | wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); |
1049 | |
1050 | } |
1051 | |
1052 | void |
1053 | cpu_offline_md(void) |
1054 | { |
1055 | int s; |
1056 | |
1057 | s = splhigh(); |
1058 | fpusave_cpu(true); |
1059 | splx(s); |
1060 | } |
1061 | |
1062 | void |
1063 | cpu_get_tsc_freq(struct cpu_info *ci) |
1064 | { |
1065 | uint32_t vcpu_tversion; |
1066 | const volatile vcpu_time_info_t *tinfo = &ci->ci_vcpu->time; |
1067 | |
1068 | vcpu_tversion = tinfo->version; |
1069 | while (tinfo->version == vcpu_tversion); /* Wait for a time update. XXX: timeout ? */ |
1070 | |
1071 | uint64_t freq = 1000000000ULL << 32; |
1072 | freq = freq / (uint64_t)tinfo->tsc_to_system_mul; |
1073 | if ( tinfo->tsc_shift < 0 ) |
1074 | freq = freq << -tinfo->tsc_shift; |
1075 | else |
1076 | freq = freq >> tinfo->tsc_shift; |
1077 | ci->ci_data.cpu_cc_freq = freq; |
1078 | } |
1079 | |
1080 | void |
1081 | x86_cpu_idle_xen(void) |
1082 | { |
1083 | struct cpu_info *ci = curcpu(); |
1084 | |
1085 | KASSERT(ci->ci_ilevel == IPL_NONE); |
1086 | |
1087 | x86_disable_intr(); |
1088 | if (!__predict_false(ci->ci_want_resched)) { |
1089 | idle_block(); |
1090 | } else { |
1091 | x86_enable_intr(); |
1092 | } |
1093 | } |
1094 | |
1095 | /* |
1096 | * Loads pmap for the current CPU. |
1097 | */ |
1098 | void |
1099 | cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap) |
1100 | { |
1101 | KASSERT(pmap != pmap_kernel()); |
1102 | |
1103 | #if defined(__x86_64__) || defined(PAE) |
1104 | struct cpu_info *ci = curcpu(); |
1105 | cpuid_t cid = cpu_index(ci); |
1106 | |
1107 | mutex_enter(&ci->ci_kpm_mtx); |
1108 | /* make new pmap visible to xen_kpm_sync() */ |
1109 | kcpuset_atomic_set(pmap->pm_xen_ptp_cpus, cid); |
1110 | #endif |
1111 | #ifdef i386 |
1112 | #ifdef PAE |
1113 | { |
1114 | int i; |
1115 | paddr_t l3_pd = xpmap_ptom_masked(ci->ci_pae_l3_pdirpa); |
1116 | /* don't update the kernel L3 slot */ |
1117 | for (i = 0 ; i < PDP_SIZE - 1; i++) { |
1118 | xpq_queue_pte_update(l3_pd + i * sizeof(pd_entry_t), |
1119 | xpmap_ptom(pmap->pm_pdirpa[i]) | PG_V); |
1120 | } |
1121 | tlbflush(); |
1122 | } |
1123 | #else /* PAE */ |
1124 | lcr3(pmap_pdirpa(pmap, 0)); |
1125 | #endif /* PAE */ |
1126 | #endif /* i386 */ |
1127 | |
1128 | #ifdef __x86_64__ |
1129 | { |
1130 | int i; |
1131 | pd_entry_t *new_pgd; |
1132 | paddr_t l4_pd_ma; |
1133 | |
1134 | l4_pd_ma = xpmap_ptom_masked(ci->ci_kpm_pdirpa); |
1135 | |
1136 | /* |
1137 | * Map user space address in kernel space and load |
1138 | * user cr3 |
1139 | */ |
1140 | new_pgd = pmap->pm_pdir; |
1141 | KASSERT(pmap == ci->ci_pmap); |
1142 | |
1143 | /* Copy user pmap L4 PDEs (in user addr. range) to per-cpu L4 */ |
1144 | for (i = 0; i < PDIR_SLOT_PTE; i++) { |
1145 | KASSERT(pmap != pmap_kernel() || new_pgd[i] == 0); |
1146 | if (ci->ci_kpm_pdir[i] != new_pgd[i]) { |
1147 | xpq_queue_pte_update( |
1148 | l4_pd_ma + i * sizeof(pd_entry_t), |
1149 | new_pgd[i]); |
1150 | } |
1151 | } |
1152 | |
1153 | xen_set_user_pgd(pmap_pdirpa(pmap, 0)); |
1154 | ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0); |
1155 | |
1156 | tlbflush(); |
1157 | } |
1158 | |
1159 | #endif /* __x86_64__ */ |
1160 | #if defined(__x86_64__) || defined(PAE) |
1161 | /* old pmap no longer visible to xen_kpm_sync() */ |
1162 | if (oldpmap != pmap_kernel()) { |
1163 | kcpuset_atomic_clear(oldpmap->pm_xen_ptp_cpus, cid); |
1164 | } |
1165 | mutex_exit(&ci->ci_kpm_mtx); |
1166 | #endif |
1167 | } |
1168 | |
1169 | /* |
1170 | * pmap_cpu_init_late: perform late per-CPU initialization. |
1171 | * Short note about percpu PDIR pages: |
1172 | * Both the PAE and __x86_64__ architectures have per-cpu PDIR |
1173 | * tables. This is to get around Xen's pagetable setup constraints for |
1174 | * PAE (multiple L3[3]s cannot point to the same L2 - Xen |
1175 | * will refuse to pin a table setup this way.) and for multiple cpus |
1176 | * to map in different user pmaps on __x86_64__ (see: cpu_load_pmap()) |
1177 | * |
1178 | * What this means for us is that the PDIR of the pmap_kernel() is |
1179 | * considered to be a canonical "SHADOW" PDIR with the following |
1180 | * properties: |
1181 | * - Its recursive mapping points to itself |
1182 | * - per-cpu recursive mappings point to themselves on __x86_64__ |
1183 | * - per-cpu L4 pages' kernel entries are expected to be in sync with |
1184 | * the shadow |
1185 | */ |
1186 | |
1187 | void |
1188 | pmap_cpu_init_late(struct cpu_info *ci) |
1189 | { |
1190 | #if defined(PAE) || defined(__x86_64__) |
1191 | /* |
1192 | * The BP has already its own PD page allocated during early |
1193 | * MD startup. |
1194 | */ |
1195 | |
1196 | #if defined(__x86_64__) |
1197 | /* Setup per-cpu normal_pdes */ |
1198 | int i; |
1199 | extern pd_entry_t * const normal_pdes[]; |
1200 | for (i = 0;i < PTP_LEVELS - 1;i++) { |
1201 | ci->ci_normal_pdes[i] = normal_pdes[i]; |
1202 | } |
1203 | #endif /* __x86_64__ */ |
1204 | |
1205 | if (ci == &cpu_info_primary) |
1206 | return; |
1207 | |
1208 | KASSERT(ci != NULL); |
1209 | |
1210 | #if defined(PAE) |
1211 | cpu_alloc_l3_page(ci); |
1212 | KASSERT(ci->ci_pae_l3_pdirpa != 0); |
1213 | |
1214 | /* Initialise L2 entries 0 - 2: Point them to pmap_kernel() */ |
1215 | int i; |
1216 | for (i = 0 ; i < PDP_SIZE - 1; i++) { |
1217 | ci->ci_pae_l3_pdir[i] = |
1218 | xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[i]) | PG_V; |
1219 | } |
1220 | #endif /* PAE */ |
1221 | |
1222 | ci->ci_kpm_pdir = (pd_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, |
1223 | UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT); |
1224 | |
1225 | if (ci->ci_kpm_pdir == NULL) { |
1226 | panic("%s: failed to allocate L4 per-cpu PD for CPU %d\n" , |
1227 | __func__, cpu_index(ci)); |
1228 | } |
1229 | ci->ci_kpm_pdirpa = vtophys((vaddr_t) ci->ci_kpm_pdir); |
1230 | KASSERT(ci->ci_kpm_pdirpa != 0); |
1231 | |
1232 | #if defined(__x86_64__) |
1233 | /* |
1234 | * Copy over the pmap_kernel() shadow L4 entries |
1235 | */ |
1236 | |
1237 | memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir, PAGE_SIZE); |
1238 | |
1239 | /* Recursive kernel mapping */ |
1240 | ci->ci_kpm_pdir[PDIR_SLOT_PTE] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PG_k | PG_V; |
1241 | #elif defined(PAE) |
1242 | /* Copy over the pmap_kernel() shadow L2 entries that map the kernel */ |
1243 | memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir + PDIR_SLOT_KERN, nkptp[PTP_LEVELS - 1] * sizeof(pd_entry_t)); |
1244 | #endif /* __x86_64__ else PAE */ |
1245 | |
1246 | /* Xen wants R/O */ |
1247 | pmap_protect(pmap_kernel(), (vaddr_t)ci->ci_kpm_pdir, |
1248 | (vaddr_t)ci->ci_kpm_pdir + PAGE_SIZE, VM_PROT_READ); |
1249 | pmap_update(pmap_kernel()); |
1250 | #if defined(PAE) |
1251 | /* Initialise L3 entry 3. This mapping is shared across all |
1252 | * pmaps and is static, ie; loading a new pmap will not update |
1253 | * this entry. |
1254 | */ |
1255 | |
1256 | ci->ci_pae_l3_pdir[3] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PG_k | PG_V; |
1257 | |
1258 | /* Mark L3 R/O (Xen wants this) */ |
1259 | pmap_protect(pmap_kernel(), (vaddr_t)ci->ci_pae_l3_pdir, |
1260 | (vaddr_t)ci->ci_pae_l3_pdir + PAGE_SIZE, VM_PROT_READ); |
1261 | pmap_update(pmap_kernel()); |
1262 | |
1263 | xpq_queue_pin_l3_table(xpmap_ptom_masked(ci->ci_pae_l3_pdirpa)); |
1264 | |
1265 | #elif defined(__x86_64__) |
1266 | xpq_queue_pin_l4_table(xpmap_ptom_masked(ci->ci_kpm_pdirpa)); |
1267 | #endif /* PAE , __x86_64__ */ |
1268 | #endif /* defined(PAE) || defined(__x86_64__) */ |
1269 | } |
1270 | |
1271 | /* |
1272 | * Notify all other cpus to halt. |
1273 | */ |
1274 | |
1275 | void |
1276 | cpu_broadcast_halt(void) |
1277 | { |
1278 | xen_broadcast_ipi(XEN_IPI_HALT); |
1279 | } |
1280 | |
1281 | /* |
1282 | * Send a dummy ipi to a cpu. |
1283 | */ |
1284 | |
1285 | void |
1286 | cpu_kick(struct cpu_info *ci) |
1287 | { |
1288 | (void)xen_send_ipi(ci, XEN_IPI_KICK); |
1289 | } |
1290 | |