aboutsummaryrefslogtreecommitdiffstats
path: root/tmk_core/protocol/adb.c
blob: 5c6c99b4fcc225fd4b5293eeafe1459271608a9c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
/*
Copyright 2011 Jun WAKO <wakojun@gmail.com>
Copyright 2013 Shay Green <gblargg@gmail.com>

This software is licensed with a Modified BSD License.
All of this is supposed to be Free Software, Open Source, DFSG-free,
GPL-compatible, and OK to use in both free and proprietary applications.
Additions and corrections to this file are welcome.


Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright
  notice, this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright
  notice, this list of conditions and the following disclaimer in
  the documentation and/or other materials provided with the
  distribution.

* Neither the name of the copyright holders nor the names of
  contributors may be used to endorse or promote products derived
  from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/

#include <stdbool.h>
#include <util/delay.h>
#include <avr/io.h>
#include <avr/interrupt.h>
#include "adb.h"


// GCC doesn't inline functions normally
#define data_lo() (ADB_DDR |=  (1<<ADB_DATA_BIT))
#define data_hi() (ADB_DDR &= ~(1<<ADB_DATA_BIT))
#define data_in() (ADB_PIN &   (1<<ADB_DATA_BIT))

#ifdef ADB_PSW_BIT
static inline void psw_lo(void);
static inline void psw_hi(void);
static inline bool psw_in(void);
#endif

static inline void attention(void);
static inline void place_bit0(void);
static inline void place_bit1(void);
static inline void send_byte(uint8_t data);
static inline uint16_t wait_data_lo(uint16_t us);
static inline uint16_t wait_data_hi(uint16_t us);
static inline uint16_t adb_host_dev_recv(uint8_t device);


void adb_host_init(void)
{
    ADB_PORT &= ~(1<<ADB_DATA_BIT);
    data_hi();
#ifdef ADB_PSW_BIT
    psw_hi();
#endif
}

#ifdef ADB_PSW_BIT
bool adb_host_psw(void)
{
    return psw_in();
}
#endif

/*
 * Don't call this in a row without the delay, otherwise it makes some of poor controllers
 * overloaded and misses strokes. Recommended interval is 12ms.
 *
 * Thanks a lot, blargg!
 * <http://geekhack.org/index.php?topic=14290.msg1068919#msg1068919>
 * <http://geekhack.org/index.php?topic=14290.msg1070139#msg1070139>
 */

// ADB Bit Cells
//
// bit cell time: 70-130us
// low part of bit0: 60-70% of bit cell
// low part of bit1: 30-40% of bit cell
//
//    bit cell time         70us        130us
//    --------------------------------------------
//    low  part of bit0     42-49       78-91
//    high part of bit0     21-28       39-52
//    low  part of bit1     21-28       39-52
//    high part of bit1     42-49       78-91
//
//
// bit0:
//    70us bit cell:
//      ____________~~~~~~
//      42-49        21-28  
//
//    130us bit cell:
//      ____________~~~~~~
//      78-91        39-52  
//
// bit1:
//    70us bit cell:
//      ______~~~~~~~~~~~~
//      21-28        42-49
//
//    130us bit cell:
//      ______~~~~~~~~~~~~
//      39-52        78-91
//
// [from Apple IIgs Hardware Reference Second Edition]

enum {
    ADDR_KEYB  = 0x20,
    ADDR_MOUSE = 0x30
};

uint16_t adb_host_kbd_recv(void)
{
    return adb_host_dev_recv(ADDR_KEYB);
}

#ifdef ADB_MOUSE_ENABLE
void adb_mouse_init(void) {
	    return;
}

uint16_t adb_host_mouse_recv(void)
{
    return adb_host_dev_recv(ADDR_MOUSE);
}
#endif

static inline uint16_t adb_host_dev_recv(uint8_t device)
{
    uint16_t data = 0;
    cli();
    attention();
    send_byte(device|0x0C);     // Addr:Keyboard(0010)/Mouse(0011), Cmd:Talk(11), Register0(00)
    place_bit0();               // Stopbit(0)
    if (!wait_data_hi(500)) {    // Service Request(310us Adjustable Keyboard): just ignored
        sei();
        return -30;             // something wrong
    }
    if (!wait_data_lo(500)) {   // Tlt/Stop to Start(140-260us)
        sei();
        return 0;               // No data to send
    }
    
    uint8_t n = 17; // start bit + 16 data bits
    do {
        uint8_t lo = (uint8_t) wait_data_hi(130);
        if (!lo)
            goto error;
        
        uint8_t hi = (uint8_t) wait_data_lo(lo);
        if (!hi)
            goto error;
        
        hi = lo - hi;
        lo = 130 - lo;
        
        data <<= 1;
        if (lo < hi) {
            data |= 1;
        }
        else if (n == 17) {
            sei();
            return -20;
        }
    }
    while ( --n );

    // Stop bit can't be checked normally since it could have service request lenghtening
    // and its high state never goes low.
    if (!wait_data_hi(351) || wait_data_lo(91)) {
        sei();
        return -21;
    }
    sei();
    return data;

error:
    sei();
    return -n;
}

void adb_host_listen(uint8_t cmd, uint8_t data_h, uint8_t data_l)
{
    cli();
    attention();
    send_byte(cmd);
    place_bit0();               // Stopbit(0)
    _delay_us(200);             // Tlt/Stop to Start
    place_bit1();               // Startbit(1)
    send_byte(data_h); 
    send_byte(data_l);
    place_bit0();               // Stopbit(0);
    sei();
}

// send state of LEDs
void adb_host_kbd_led(uint8_t led)
{
    // Addr:Keyboard(0010), Cmd:Listen(10), Register2(10)
    // send upper byte (not used)
    // send lower byte (bit2: ScrollLock, bit1: CapsLock, bit0:
    adb_host_listen(0x2A,0,led&0x07);
}


#ifdef ADB_PSW_BIT
static inline void psw_lo()
{
    ADB_DDR  |=  (1<<ADB_PSW_BIT);
    ADB_PORT &= ~(1<<ADB_PSW_BIT);
}
static inline void psw_hi()
{
    ADB_PORT |=  (1<<ADB_PSW_BIT);
    ADB_DDR  &= ~(1<<ADB_PSW_BIT);
}
static inline bool psw_in()
{
    ADB_PORT |=  (1<<ADB_PSW_BIT);
    ADB_DDR  &= ~(1<<ADB_PSW_BIT);
    return ADB_PIN&(1<<ADB_PSW_BIT);
}
#endif

static inline void attention(void)
{
    data_lo();
    _delay_us(800-35); // bit1 holds lo for 35 more
    place_bit1();
}

static inline void place_bit0(void)
{
    data_lo();
    _delay_us(65);
    data_hi();
    _delay_us(35);
}

static inline void place_bit1(void)
{
    data_lo();
    _delay_us(35);
    data_hi();
    _delay_us(65);
}

static inline void send_byte(uint8_t data)
{
    for (int i = 0; i < 8; i++) {
        if (data&(0x80>>i))
            place_bit1();
        else
            place_bit0();
    }
}

// These are carefully coded to take 6 cycles of overhead.
// inline asm approach became too convoluted
static inline uint16_t wait_data_lo(uint16_t us)
{
    do {
        if ( !data_in() )
            break;
        _delay_us(1 - (6 * 1000000.0 / F_CPU));
    }
    while ( --us );
    return us;
}

static inline uint16_t wait_data_hi(uint16_t us)
{
    do {
        if ( data_in() )
            break;
        _delay_us(1 - (6 * 1000000.0 / F_CPU));
    }
    while ( --us );
    return us;
}


/*
ADB Protocol
============

Resources
---------
ADB - The Untold Story: Space Aliens Ate My Mouse
    http://developer.apple.com/legacy/mac/library/#technotes/hw/hw_01.html
ADB Manager
    http://developer.apple.com/legacy/mac/library/documentation/mac/pdf/Devices/ADB_Manager.pdf
    Service request(5-17)
Apple IIgs Hardware Reference Second Edition [Chapter6 p121]
    ftp://ftp.apple.asimov.net/pub/apple_II/documentation/Apple%20IIgs%20Hardware%20Reference.pdf
ADB Keycode
    http://72.0.193.250/Documentation/macppc/adbkeycodes/
    http://m0115.web.fc2.com/m0115.jpg
    [Inside Macintosh volume V, pages 191-192]
    http://www.opensource.apple.com/source/IOHIDFamily/IOHIDFamily-421.18.3/IOHIDFamily/Cosmo_USB2ADB.c
ADB Signaling
    http://kbdbabel.sourceforge.net/doc/kbd_signaling_pcxt_ps2_adb.pdf
ADB Overview & History
    http://en.wikipedia.org/wiki/Apple_Desktop_Bus
Microchip Application Note: ADB device(with code for PIC16C)
    http://www.microchip.com/stellent/idcplg?IdcService=SS_GET_PAGE&nodeId=1824&appnote=en011062
AVR ATtiny2131 ADB to PS/2 converter(Japanese)
    http://hp.vector.co.jp/authors/VA000177/html/KeyBoardA5DEA5CBA5A2II.html


Pinouts
-------
    ADB female socket from the front:
    __________
    |        | <--- top
    | 4o  o3 |
    |2o    o1|
    |   ==   |
    |________| <--- bottom
      |    |   <--- 4pins


    ADB female socket from bottom:

    ========== <--- front
    |        |
    |        |
    |2o    o1|
    |4o    o3|
    ---------- <--- back

    1: Data
    2: Power SW(low when press Power key)
    3: Vcc(5V)
    4: GND


Commands
--------
    ADB command is 1byte and consists of 4bit-address, 2bit-command
    type and 2bit-register. The commands are always sent by Host.

    Command format:
    7 6 5 4 3 2 1 0
    | | | |------------ address
            | |-------- command type
                | |---- register

    bits                commands
    ------------------------------------------------------
    - - - - 0 0 0 0     Send Request(reset all devices)
    A A A A 0 0 0 1     Flush(reset a device)
    - - - - 0 0 1 0     Reserved
    - - - - 0 0 1 1     Reserved
    - - - - 0 1 - -     Reserved
    A A A A 1 0 R R     Listen(write to a device)
    A A A A 1 1 R R     Talk(read from a device)

    The command to read keycodes from keyboard is 0x2C which
    consist of keyboard address 2 and Talk against register 0. 

    Address:
    2:  keyboard
    3:  mice

    Registers:
    0: application(keyboard uses this to store its data.)
    1: application
    2: application(keyboard uses this for LEDs and state of modifiers)
    3: status and command


Communication
-------------
    This is a minimum information for keyboard communication.
    See "Resources" for detail.

    Signaling:

    ~~~~____________~~||||||||||||__~~~~~_~~|||||||||||||||__~~~~

        |800us     |  |7 Command 0|  |   |  |15-64  Data  0|Stopbit(0)
        +Attention |              |  |   +Startbit(1)
                   +Startbit(1)   |  +Tlt(140-260us)
                                  +stopbit(0)

    Bit cells:

    bit0: ______~~~
          65    :35us

    bit1: ___~~~~~~
          35 :65us

    bit0 low time: 60-70% of bit cell(42-91us)
    bit1 low time: 30-40% of bit cell(21-52us)
    bit cell time: 70-130us
    [from Apple IIgs Hardware Reference Second Edition]

    Criterion for bit0/1:
    After 55us if line is low/high then bit is 0/1.

    Attention & start bit:
    Host asserts low in 560-1040us then places start bit(1).

    Tlt(Stop to Start):
    Bus stays high in 140-260us then device places start bit(1).

    Global reset:
    Host asserts low in 2.8-5.2ms. All devices are forced to reset.

    Service request from device(Srq):
    Device can request to send at commad(Global only?) stop bit.
    Requesting device keeps low for 140-260us at stop bit of command.


Keyboard Data(Register0)
    This 16bit data can contains two keycodes and two released flags.
    First keycode is palced in upper byte. When one keyocode is sent,
    lower byte is 0xFF.
    Release flag is 1 when key is released.

    1514 . . . . . 8 7 6 . . . . . 0
     | | | | | | | | | +-+-+-+-+-+-+-   Keycode2
     | | | | | | | | +---------------   Released2(1 when the key is released)
     | +-+-+-+-+-+-+-----------------   Keycode1
     +-------------------------------   Released1(1 when the key is released)

    Keycodes:
    Scancode consists of 7bit keycode and 1bit release flag.
    Device can send two keycodes at once. If just one keycode is sent
    keycode1 contains it and keyocode2 is 0xFF.

    Power switch:
    You can read the state from PSW line(active low) however
    the switch has a special scancode 0x7F7F, so you can
    also read from Data line. It uses 0xFFFF for release scancode.

Keyboard LEDs & state of keys(Register2)
    This register hold current state of three LEDs and nine keys.
    The state of LEDs can be changed by sending Listen command.
    
    1514 . . . . . . 7 6 5 . 3 2 1 0
     | | | | | | | | | | | | | | | +-   LED1(NumLock)
     | | | | | | | | | | | | | | +---   LED2(CapsLock)
     | | | | | | | | | | | | | +-----   LED3(ScrollLock)
     | | | | | | | | | | +-+-+-------   Reserved
     | | | | | | | | | +-------------   ScrollLock
     | | | | | | | | +---------------   NumLock
     | | | | | | | +-----------------   Apple/Command
     | | | | | | +-------------------   Option
     | | | | | +---------------------   Shift
     | | | | +-----------------------   Control
     | | | +-------------------------   Reset/Power
     | | +---------------------------   CapsLock
     | +-----------------------------   Delete
     +-------------------------------   Reserved

END_OF_ADB
*/
t_to_phys(trampoline_base); } /* * We are called very early to get the low memory for the * SMP bootup trampoline page. */ void __init smp_alloc_memory(void) { trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); /* * Has to be in very low memory so we can execute * real-mode AP code. */ if (__pa(trampoline_base) >= 0x9F000) BUG(); /* * Make the SMP trampoline executable: */ trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); } /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU */ static void __init smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; *c = boot_cpu_data; if (id!=0) identify_cpu(c); /* * Mask B, Pentium, but not Pentium MMX */ if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 5 && c->x86_mask >= 1 && c->x86_mask <= 4 && c->x86_model <= 3) /* * Remember we have B step Pentia with bugs */ smp_b_stepping = 1; /* * Certain Athlons might work (for various values of 'work') in SMP * but they are not certified as MP capable. */ if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { /* Athlon 660/661 is valid. */ if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) goto valid_k7; /* Duron 670 is valid */ if ((c->x86_model==7) && (c->x86_mask==0)) goto valid_k7; /* * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. * It's worth noting that the A5 stepping (662) of some Athlon XP's * have the MP bit set. * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. */ if (((c->x86_model==6) && (c->x86_mask>=2)) || ((c->x86_model==7) && (c->x86_mask>=1)) || (c->x86_model> 7)) if (cpu_has_mp) goto valid_k7; /* If we get here, it's not a certified SMP capable AMD system. */ tainted |= TAINT_UNSAFE_SMP; } valid_k7: ; } /* * TSC synchronization. * * We first check whether all CPUs have their TSC's synchronized, * then we print a warning if not, and always resync. */ static atomic_t tsc_start_flag = ATOMIC_INIT(0); static atomic_t tsc_count_start = ATOMIC_INIT(0); static atomic_t tsc_count_stop = ATOMIC_INIT(0); static unsigned long long tsc_values[NR_CPUS]; #define NR_LOOPS 5 static void __init synchronize_tsc_bp (void) { int i; unsigned long long t0; unsigned long long sum, avg; long long delta; unsigned long one_usec; int buggy = 0; printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); /* convert from kcyc/sec to cyc/usec */ one_usec = cpu_khz / 1000; atomic_set(&tsc_start_flag, 1); wmb(); /* * We loop a few times to get a primed instruction cache, * then the last pass is more or less synchronized and * the BP and APs set their cycle counters to zero all at * once. This reduces the chance of having random offsets * between the processors, and guarantees that the maximum * delay between the cycle counters is never bigger than * the latency of information-passing (cachelines) between * two CPUs. */ for (i = 0; i < NR_LOOPS; i++) { /* * all APs synchronize but they loop on '== num_cpus' */ while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) mb(); atomic_set(&tsc_count_stop, 0); wmb(); /* * this lets the APs save their current TSC: */ atomic_inc(&tsc_count_start); rdtscll(tsc_values[smp_processor_id()]); /* * We clear the TSC in the last loop: */ if (i == NR_LOOPS-1) write_tsc(0, 0); /* * Wait for all APs to leave the synchronization point: */ while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) mb(); atomic_set(&tsc_count_start, 0); wmb(); atomic_inc(&tsc_count_stop); } sum = 0; for (i = 0; i < NR_CPUS; i++) { if (cpu_isset(i, cpu_callout_map)) { t0 = tsc_values[i]; sum += t0; } } avg = sum; do_div(avg, num_booting_cpus()); sum = 0; for (i = 0; i < NR_CPUS; i++) { if (!cpu_isset(i, cpu_callout_map)) continue; delta = tsc_values[i] - avg; if (delta < 0) delta = -delta; /* * We report bigger than 2 microseconds clock differences. */ if (delta > 2*one_usec) { long realdelta; if (!buggy) { buggy = 1; printk("\n"); } realdelta = delta; do_div(realdelta, one_usec); if (tsc_values[i] < avg) realdelta = -realdelta; printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); } sum += delta; } if (!buggy) printk("passed.\n"); } static void __init synchronize_tsc_ap (void) { int i; /* * Not every cpu is online at the time * this gets called, so we first wait for the BP to * finish SMP initialization: */ while (!atomic_read(&tsc_start_flag)) mb(); for (i = 0; i < NR_LOOPS; i++) { atomic_inc(&tsc_count_start); while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb(); rdtscll(tsc_values[smp_processor_id()]); if (i == NR_LOOPS-1) write_tsc(0, 0); atomic_inc(&tsc_count_stop); while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); } } #undef NR_LOOPS extern void calibrate_delay(void); static atomic_t init_deasserted; void __init smp_callin(void) { int cpuid, phys_id, i; /* * If waken up by an INIT in an 82489DX configuration * we may get here before an INIT-deassert IPI reaches * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. */ wait_for_init_deassert(&init_deasserted); /* * (This works even if the APIC is not enabled.) */ phys_id = GET_APIC_ID(apic_read(APIC_ID)); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { printk("huh, phys CPU#%d, CPU#%d already present??\n", phys_id, cpuid); BUG(); } Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); /* * STARTUP IPIs are fragile beasts as they might sometimes * trigger some glue motherboard logic. Complete APIC bus * silence for 1 second, this overestimates the time the * boot CPU is spending to send the up to 2 STARTUP IPIs * by a factor of two. This should be enough. */ /* * Waiting 2s total for startup */ for (i = 0; i < 200; i++) { /* * Has the boot CPU finished it's STARTUP sequence? */ if (cpu_isset(cpuid, cpu_callout_map)) break; rep_nop(); mdelay(10); } if (!cpu_isset(cpuid, cpu_callout_map)) { printk("BUG: CPU%d started up but did not get a callout!\n", cpuid); BUG(); } /* * the boot CPU has finished the init stage and is spinning * on callin_map until we finish. We are free to set up this * CPU, first the APIC. (this is probably redundant on most * boards) */ Dprintk("CALLIN, before setup_local_APIC().\n"); smp_callin_clear_local_apic(); setup_local_APIC(); map_cpu_to_logical_apicid(); #if 0 /* * Get our bogomips. */ calibrate_delay(); Dprintk("Stack at about %p\n",&cpuid); #endif /* * Save our processor parameters */ smp_store_cpu_info(cpuid); disable_APIC_timer(); /* * Allow the master to continue. */ cpu_set(cpuid, cpu_callin_map); /* * Synchronize the TSC with the BP */ if (cpu_has_tsc && cpu_khz) synchronize_tsc_ap(); calibrate_tsc_ap(); } int cpucount; #ifdef CONFIG_X86_32 static void construct_percpu_idt(unsigned int cpu) { unsigned char idt_load[10]; idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES); memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t)); *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1; *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu]; __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) ); } #endif /* * Activate a secondary processor. */ void __init start_secondary(void *unused) { unsigned int cpu = cpucount; extern void percpu_traps_init(void); set_current(idle_task[cpu]); set_processor_id(cpu); percpu_traps_init(); cpu_init(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) rep_nop(); #ifdef CONFIG_X86_32 /* * At this point, boot CPU has fully initialised the IDT. It is * now safe to make ourselves a private copy. */ construct_percpu_idt(cpu); #endif setup_secondary_APIC_clock(); enable_APIC_timer(); /* * low-memory mappings have been cleared, flush them from * the local TLBs too. */ local_flush_tlb(); cpu_set(smp_processor_id(), cpu_online_map); /* We can take interrupts now: we're officially "up". */ local_irq_enable(); init_percpu_time(); wmb(); startup_cpu_idle_loop(); } extern struct { void * esp; unsigned short ss; } stack_start; #ifdef CONFIG_NUMA /* which logical CPUs are on which nodes */ cpumask_t node_2_cpu_mask[MAX_NUMNODES] = { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; /* which node each logical CPU is on */ int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_2_node); /* set up a mapping between cpu and node. */ static inline void map_cpu_to_node(int cpu, int node) { printk("Mapping cpu %d to node %d\n", cpu, node); cpu_set(cpu, node_2_cpu_mask[node]); cpu_2_node[cpu] = node; } /* undo a mapping between cpu and node. */ static inline void unmap_cpu_to_node(int cpu) { int node; printk("Unmapping cpu %d from all nodes\n", cpu); for (node = 0; node < MAX_NUMNODES; node ++) cpu_clear(cpu, node_2_cpu_mask[node]); cpu_2_node[cpu] = 0; } #else /* !CONFIG_NUMA */ #define map_cpu_to_node(cpu, node) ({}) #define unmap_cpu_to_node(cpu) ({}) #endif /* CONFIG_NUMA */ u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; void map_cpu_to_logical_apicid(void) { int cpu = smp_processor_id(); int apicid = logical_smp_processor_id(); cpu_2_logical_apicid[cpu] = apicid; map_cpu_to_node(cpu, apicid_to_node(apicid)); } void unmap_cpu_to_logical_apicid(int cpu) { cpu_2_logical_apicid[cpu] = BAD_APICID; unmap_cpu_to_node(cpu); } #if APIC_DEBUG static inline void __inquire_remote_apic(int apicid) { int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; char *names[] = { "ID", "VERSION", "SPIV" }; int timeout, status; printk("Inquiring remote APIC #%d...\n", apicid); for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { printk("... APIC #%d %s: ", apicid, names[i]); /* * Wait for idle. */ apic_wait_icr_idle(); apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); timeout = 0; do { udelay(100); status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); switch (status) { case APIC_ICR_RR_VALID: status = apic_read(APIC_RRR); printk("%08x\n", status); break; default: printk("failed\n"); } } } #endif #ifdef WAKE_SECONDARY_VIA_NMI /* * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ static int __init wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; int timeout, maxlvt; /* Target chip */ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); /* Boot on the stack */ /* Kick the second */ apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); Dprintk("Waiting for send to finish...\n"); timeout = 0; do { Dprintk("+"); udelay(100); send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; } while (send_status && (timeout++ < 1000)); /* * Give the other CPU some time to accept the IPI. */ udelay(200); /* * Due to the Pentium erratum 3AP. */ maxlvt = get_maxlvt(); if (maxlvt > 3) { apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); } accept_status = (apic_read(APIC_ESR) & 0xEF); Dprintk("NMI sent.\n"); if (send_status) printk("APIC never delivered???\n"); if (accept_status) printk("APIC delivery error (%lx).\n", accept_status); return (send_status | accept_status); } #endif /* WAKE_SECONDARY_VIA_NMI */ #ifdef WAKE_SECONDARY_VIA_INIT static int __init wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; int maxlvt, timeout, num_starts, j; /* * Be paranoid about clearing APIC errors. */ if (APIC_INTEGRATED(apic_version[phys_apicid])) { apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } Dprintk("Asserting INIT.\n"); /* * Turn INIT on target chip */ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* * Send IPI */ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); timeout = 0; do { Dprintk("+"); udelay(100); send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; } while (send_status && (timeout++ < 1000)); mdelay(10); Dprintk("Deasserting INIT.\n"); /* Target chip */ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Send IPI */ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); Dprintk("Waiting for send to finish...\n"); timeout = 0; do { Dprintk("+"); udelay(100); send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; } while (send_status && (timeout++ < 1000)); atomic_set(&init_deasserted, 1); /* * Should we send STARTUP IPIs ? * * Determine this based on the APIC version. * If we don't have an integrated APIC, don't send the STARTUP IPIs. */ if (APIC_INTEGRATED(apic_version[phys_apicid])) num_starts = 2; else num_starts = 0; /* * Run STARTUP IPI loop. */ Dprintk("#startup loops: %d.\n", num_starts); maxlvt = get_maxlvt(); for (j = 1; j <= num_starts; j++) { Dprintk("Sending STARTUP #%d.\n",j); apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); Dprintk("After apic_write.\n"); /* * STARTUP IPI */ /* Target chip */ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); /* Boot on the stack */ /* Kick the second */ apic_write_around(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); /* * Give the other CPU some time to accept the IPI. */ udelay(300); Dprintk("Startup point 1.\n"); Dprintk("Waiting for send to finish...\n"); timeout = 0; do { Dprintk("+"); udelay(100); send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; } while (send_status && (timeout++ < 1000)); /* * Give the other CPU some time to accept the IPI. */ udelay(200); /* * Due to the Pentium erratum 3AP. */ if (maxlvt > 3) { apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); } accept_status = (apic_read(APIC_ESR) & 0xEF); if (send_status || accept_status) break; } Dprintk("After Startup.\n"); if (send_status) printk("APIC never delivered???\n"); if (accept_status) printk("APIC delivery error (%lx).\n", accept_status); return (send_status | accept_status); } #endif /* WAKE_SECONDARY_VIA_INIT */ extern cpumask_t cpu_initialized; static int __init do_boot_cpu(int apicid) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. */ { struct domain *idle; struct vcpu *v; void *stack; unsigned long boot_error; int timeout, cpu; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; cpu = ++cpucount; if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL ) panic("failed 'createdomain' for CPU %d", cpu); v = idle_task[cpu] = idle->vcpu[0]; set_bit(_DOMF_idle_domain, &idle->domain_flags); v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table)); /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); /* So we see what's up */ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); stack = alloc_xenheap_pages(STACK_ORDER); #if defined(__i386__) stack_start.esp = (void *)__pa(stack); #elif defined(__x86_64__) stack_start.esp = stack; #endif stack_start.esp += STACK_SIZE - sizeof(struct cpu_info); /* Debug build: detect stack overflow by setting up a guard page. */ memguard_guard_stack(stack); /* * This grunge runs the startup process for * the targeted processor. */ atomic_set(&init_deasserted, 0); Dprintk("Setting warm reset code and vector.\n"); store_NMI_vector(&nmi_high, &nmi_low); CMOS_WRITE(0xa, 0xf); local_flush_tlb(); Dprintk("1.\n"); *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; Dprintk("2.\n"); *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; Dprintk("3.\n"); /* * Starting actual IPI sequence... */ boot_error = wakeup_secondary_cpu(apicid, start_eip); if (!boot_error) { /* * allow APs to start initializing. */ Dprintk("Before Callout %d.\n", cpu); cpu_set(cpu, cpu_callout_map); Dprintk("After Callout %d.\n", cpu); /* * Wait 5s total for a response */ for (timeout = 0; timeout < 50000; timeout++) { if (cpu_isset(cpu, cpu_callin_map)) break; /* It has booted */ udelay(100); } if (cpu_isset(cpu, cpu_callin_map)) { /* number CPUs logically, starting from 1 (BSP is 0) */ Dprintk("OK.\n"); printk("CPU%d: ", cpu); print_cpu_info(&cpu_data[cpu]); Dprintk("CPU has booted.\n"); } else { boot_error= 1; if (*((volatile unsigned char *)trampoline_base) == 0xA5) /* trampoline started but...? */ printk("Stuck ??\n"); else /* trampoline code not run */ printk("Not responding.\n"); inquire_remote_apic(apicid); } } x86_cpu_to_apicid[cpu] = apicid; if (boot_error) { /* Try to put things back the way they were before ... */ unmap_cpu_to_logical_apicid(cpu); cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpucount--; } /* mark "stuck" area as not stuck */ *((volatile unsigned long *)trampoline_base) = 0; return boot_error; } #if 0 cycles_t cacheflush_time; unsigned long cache_decay_ticks; static void smp_tune_scheduling (void) { unsigned long cachesize; /* kB */ unsigned long bandwidth = 350; /* MB/s */ /* * Rough estimation for SMP scheduling, this is the number of * cycles it takes for a fully memory-limited process to flush * the SMP-local cache. * * (For a P5 this pretty much means we will choose another idle * CPU almost always at wakeup time (this is due to the small * L1 cache), on PIIs it's around 50-100 usecs, depending on * the cache size) */ if (!cpu_khz) { /* * this basically disables processor-affinity * scheduling on SMP without a TSC. */ cacheflush_time = 0; return; } else { cachesize = boot_cpu_data.x86_cache_size; if (cachesize == -1) { cachesize = 16; /* Pentiums, 2x8kB cache */ bandwidth = 100; } cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth; } cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1; printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", (long)cacheflush_time/(cpu_khz/1000), ((long)cacheflush_time*100/(cpu_khz/1000)) % 100); printk("task migration cache decay timeout: %ld msecs.\n", cache_decay_ticks); } #else #define smp_tune_scheduling() ((void)0) #endif /* * Cycle through the processors sending APIC IPIs to boot each. */ static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); static void __init smp_boot_cpus(unsigned int max_cpus) { int apicid, cpu, bit, kicked; #ifdef BOGOMIPS unsigned long bogosum = 0; #endif /* * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ printk("CPU%d: ", 0); print_cpu_info(&cpu_data[0]); boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; /*current_thread_info()->cpu = 0;*/ smp_tune_scheduling(); cpus_clear(cpu_sibling_map[0]); cpu_set(0, cpu_sibling_map[0]); cpus_clear(cpu_core_map[0]); cpu_set(0, cpu_core_map[0]); /* * If we couldn't find an SMP configuration at boot time, * get out of here now! */ if (!smp_found_config && !acpi_lapic) { printk(KERN_NOTICE "SMP motherboard not detected.\n"); init_uniprocessor: phys_cpu_present_map = physid_mask_of_physid(0); if (APIC_init_uniprocessor()) printk(KERN_NOTICE "Local APIC not detected." " Using dummy APIC emulation.\n"); map_cpu_to_logical_apicid(); cpu_set(0, cpu_sibling_map[0]); cpu_set(0, cpu_core_map[0]); return; } /* * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. * Makes no sense to do this check in clustered apic mode, so skip it */ if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { printk("weird, boot CPU (#%d) not listed by the BIOS.\n", boot_cpu_physical_apicid); physid_set(hard_smp_processor_id(), phys_cpu_present_map); } /* * If we couldn't find a local APIC, then get out of here now! */ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); goto init_uniprocessor; } verify_local_APIC(); /* * If SMP should be disabled, then really disable it! */ if (!max_cpus) goto init_uniprocessor; connect_bsp_APIC(); setup_local_APIC(); map_cpu_to_logical_apicid(); setup_portio_remap(); /* * Scan the CPU present map and fire up the other CPUs via do_boot_cpu * * In clustered apic mode, phys_cpu_present_map is a constructed thus: * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the * clustered apic ID. */ Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); kicked = 1; for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) { apicid = cpu_present_to_apicid(bit); /* * Don't even attempt to start the boot CPU! */ if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID)) continue; if (!check_apicid_present(bit)) continue; if (max_cpus <= cpucount+1) continue; if (do_boot_cpu(apicid)) printk("CPU #%d not responding - cannot use it.\n", apicid); else ++kicked; } /* * Install writable page 0 entry to set BIOS data area. */ local_flush_tlb(); /* * Paranoid: Set warm reset code and vector here back * to default values. */ CMOS_WRITE(0, 0xf); *((volatile long *) phys_to_virt(0x467)) = 0; #ifdef BOGOMIPS /* * Allow the user to impress friends. */ Dprintk("Before bogomips.\n"); for (cpu = 0; cpu < NR_CPUS; cpu++) if (cpu_isset(cpu, cpu_callout_map)) bogosum += cpu_data[cpu].loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); #else printk("Total of %d processors activated.\n", cpucount+1); #endif Dprintk("Before bogocount - setting activated=1.\n"); if (smp_b_stepping) printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); /* * Don't taint if we are running SMP kernel on a single non-MP * approved Athlon */ if (tainted & TAINT_UNSAFE_SMP) { if (cpucount) printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); else tainted &= ~TAINT_UNSAFE_SMP; } Dprintk("Boot done.\n"); /* * construct cpu_sibling_map[], so that we can tell sibling CPUs * efficiently. */ for (cpu = 0; cpu < NR_CPUS; cpu++) { cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); } for (cpu = 0; cpu < NR_CPUS; cpu++) { struct cpuinfo_x86 *c = cpu_data + cpu; int siblings = 0; int i; if (!cpu_isset(cpu, cpu_callout_map)) continue; if (smp_num_siblings > 1) { for (i = 0; i < NR_CPUS; i++) { if (!cpu_isset(i, cpu_callout_map)) continue; if (cpu_core_id[cpu] == cpu_core_id[i]) { siblings++; cpu_set(i, cpu_sibling_map[cpu]); } } } else { siblings++; cpu_set(cpu, cpu_sibling_map[cpu]); } if (siblings != smp_num_siblings) { printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); smp_num_siblings = siblings; } if (c->x86_num_cores > 1) { for (i = 0; i < NR_CPUS; i++) { if (!cpu_isset(i, cpu_callout_map)) continue; if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); } } } else { cpu_core_map[cpu] = cpu_sibling_map[cpu]; } } if (nmi_watchdog == NMI_LOCAL_APIC) check_nmi_watchdog(); /* * Here we can be sure that there is an IO-APIC in the system. Let's * go and set it up: */ if (!skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); setup_boot_APIC_clock(); /* * Synchronize the TSC with the AP */ if (cpu_has_tsc && cpucount && cpu_khz) synchronize_tsc_bp(); calibrate_tsc_bp(); } /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) { smp_boot_cpus(max_cpus); } void __devinit smp_prepare_boot_cpu(void) { cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callout_map); } int __devinit __cpu_up(unsigned int cpu) { /* This only works at boot for x86. See "rewrite" above. */ if (cpu_isset(cpu, smp_commenced_mask)) return -ENOSYS; /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) return -EIO; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) { mb(); if (softirq_pending(0)) do_softirq(); } return 0; } void __init smp_cpus_done(unsigned int max_cpus) { #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif #ifdef CONFIG_X86_64 zap_low_mappings(); #endif /* * Disable executability of the SMP trampoline: */ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); } #if 0 void __init smp_intr_init(void) { /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. */ set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); /* IPI for invalidation */ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } #endif