Filter:   InfoImg
download net_filter.c
Language: C
Copyright: Copyright 1991-1998 by Open Software Foundation, Inc.
LOC: 520
Project Info
Hurd on Mach on PowerPC(hurdppc)
Server: Savannah NonGNU
Type: cvs
...pc\hurdppc\gnu‑osfmach\ppc\
   _setjmp.S
   alignment.c
   asm.h
   ast.h
   ast_types.h
   bcopy.S
   bits.S
   boot.h
   bzero.S
   cache.S
   clock.h
   console_feed.c
   console_feed_entries.h
   cpu_number.h
   cswtch.S
   db_disasm.c
   db_interface.c
   db_low_trace.c
   db_low_trace.h
   db_machdep.h
   db_trace.c
   endian.h
   exception.h
   Firmware.h
   Firmware.S
   FirmwareCalls.h
   fpu_protos.h
   gdb_defs.h
   genassym.c
   go.c
   hardclock_entries.h
   hw_exception.S
   hw_lock.S
   interrupt.c
   io_map.c
   io_map_entries.h
   iobus.h
   iso_font.h
   kgdb_defs.h
   kgdb_interface.c
   kgdb_setjmp.h
   klglue.S
   lock.h
   low_trace.h
   lowmem_vectors.S
   mach_param.h
   machine_routines.h
   machine_rpc.h
   machlimits.h
   machparam.h
   mem.c
   mem.h
   misc.c
   misc_asm.S
   misc_protos.h
   model_dep.c
   movc.S
   mp.h
   MPinterfaces.S
   ndr_def.h
   net_filter.c
   new_screen.h
   pcb.c
   pmap.c
   pmap.c-broken
   pmap.h
   pmap_internals.h
   ppc_disasm.i
   ppc_init.c
   ppc_rpc.c
   proc_reg.h
   screen.h
   screen_defs.h
   screen_switch.h
   serial_console.c
   serial_defs.h
   setjmp.h
   spl.h
   start.S
   status.c
   strlen.c
   stubs.c
   syscall_subr.h
   task.h
   testjump.c
   thread.h
   thread_act.h
   trap.c
   trap.h
   vm_tuning.h
   xpr.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
/*
 * Copyright 1991-1998 by Open Software Foundation, Inc. 
 *              All Rights Reserved 
 *  
 * Permission to use, copy, modify, and distribute this software and 
 * its documentation for any purpose and without fee is hereby granted, 
 * provided that the above copyright notice appears in all copies and 
 * that both the copyright notice and this permission notice appear in 
 * supporting documentation. 
 *  
 * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE 
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
 * FOR A PARTICULAR PURPOSE. 
 *  
 * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR 
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 
 * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, 
 * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION 
 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 
 * 
 */
/*
 * MkLinux
 */

#include <device/net_io.h>

#if NET_FILTER_COMPILER


#define USE_EXTRA_REGS 0

#define REG_ZERO	0	/* Register we keep equal to 0. */
#define REG_DATAADDR	3	/* Address of packet data, and filter return. */
#define REG_DATALEN	4	/* Length of packet data in two-byte units. */
#define REG_HDRADDR	5	/* Address of header data. */
#define REG_RET		3	/* Where to put return value. */

/* Originally we dealt in virtual register numbers which were essentially
   indexes into this array, and only converted to machine register numbers
   when emitting instructions.  But that meant a lot of conversions, so
   instead we deal with machine register numbers all along, even though this
   means wasting slots in the regs[] array.  */
const unsigned char scratchregs[] = {
    6, 7, 8, 9, 10, 11, 12,
#if USE_EXTRA_REGS	/* Callee-saves regs available if we save them. */
#define INITIAL_NSCRATCHREGS 8	/* Number of registers above. */
    #error not yet written
#endif
};
#define NSCRATCHREGS (sizeof scratchregs / sizeof scratchregs[0])
#define NREGS 32
#define NO_REG 1	/* This is the stack pointer!  Flag value.  */

#define MAX_LI		0x7fff	/* Max unsigned value in an LI. */

#define BCLR(cond)	((19 << 26) | (cond) | (16 << 1))
#define BLR()		BCLR(COND_ALWAYS)
#define BC(cond, off)	((16 << 26) | (cond) | ((off) << 2))
#define COND(BO, BI)	(((BO) << (16 + 5)) | ((BI) << 16))
#define COND_ALWAYS	COND(COND_IF_ALWAYS, 0)
#define COND_EQ		COND(COND_IF_TRUE, COND_BIT(0, BIT_EQ))
#define COND_NE		COND(COND_IF_FALSE, COND_BIT(0, BIT_EQ))
#define COND_LE		COND(COND_IF_FALSE, COND_BIT(0, BIT_GT))
#define COND_GE		COND(COND_IF_FALSE, COND_BIT(0, BIT_LT))
#define COND_BIT(crf, bit) \
			((crf) * 4 + (bit))
#define BIT_EQ		2
#define BIT_GT		1
#define BIT_LT		0
#define COND_IF_FALSE	0x04
#define COND_IF_TRUE	0x0c
#define COND_IF_ALWAYS	0x14

/* For arithmetical instructions, a is the dest and b is the source;
   for logical instructions, a is the source and b is the dest.  Ho hum.  */
#define IMMED(opcode, a, b, imm) \
			(((opcode) << 26) | ((a) << 21) | ((b) << 16) | \
			 ((imm) & 0xffff))
#define ADDI(dst, src, imm) \
			IMMED(14, dst, src, imm)
#define ADDIC(dst, src, imm) \
			IMMED(12, dst, src, imm)
#define SUBFIC(dst, src, imm) \
			IMMED(8, dst, src, imm)
#define LI(dst, imm)	ADDI(dst, 0, (imm))
#define ANDI(dst, src, imm) \
			IMMED(28, src, dst, imm)
#define ORI(dst, src, imm) \
			IMMED(24, src, dst, imm)
#define XORI(dst, src, imm) \
			IMMED(26, src, dst, imm)

#define CMPL(lhs, rhs)	((31 << 26) | ((lhs) << 16) | ((rhs) << 11) | (32 << 1))
#define CMPLI(lhs, imm)	((10 << 26) | ((lhs) << 16) | ((imm) & 0xffff))

#define INTEGER_OP(opcode, a, b, c) \
			((31 << 26) | ((a) << 21) | ((b) << 16) | \
			 ((c) << 11) | ((opcode) << 1))
#define ARITH_OP(opcode, dst, lhs, rhs) \
			INTEGER_OP(opcode, dst, lhs, rhs)
#define ADD(dst, lhs, rhs) \
			ARITH_OP(OP_ADD, dst, lhs, rhs)
#define ADDE(dst, lhs, rhs) \
			ARITH_OP(OP_ADDE, dst, lhs, rhs)
#define SUBF(dst, lhs, rhs) \
			ARITH_OP(OP_SUBF, dst, lhs, rhs)
#define SUBFC(dst, lhs, rhs) \
			ARITH_OP(OP_SUBFC, dst, lhs, rhs)
#define SUBFE(dst, lhs, rhs) \
			ARITH_OP(OP_SUBFE, dst, lhs, rhs)
#define LOGIC_OP(opcode, dst, lhs, rhs) \
			INTEGER_OP(opcode, lhs, dst, rhs)
#define OR(dst, lhs, rhs) \
			LOGIC_OP(OP_OR, dst, lhs, rhs)
#define XOR(dst, lhs, rhs) \
			LOGIC_OP(OP_XOR, dst, lhs, rhs)
#define OP_ADD 		266
#define OP_ADDE		138
#define OP_AND		28
#define OP_OR		444
#define OP_SRW		536
#define OP_SUBF		40
#define OP_SUBFC	8
#define OP_SUBFE	136
#define OP_XOR		316
#define MR(dst, src)	OR(dst, src, src)

#define LHZ(dst, base, offset) \
			((40 << 26) | ((dst) << 21) | ((base) << 16) | \
			 ((offset) & 0xffff))
#define LHZX(dst, base, index) \
			INTEGER_OP(279, dst, base, index)
#define MFCR(dst)	INTEGER_OP(19, dst, 0, 0)

#define RLWINM(dst, src, shiftimm, mbegin, mend) \
			((21 << 26) | ((src) << 21) | ((dst) << 16) | \
			 ((shiftimm) << 11) | ((mbegin) << 6) | ((mend) << 1))
#define RLWNM(dst, src, shiftreg, mbegin, mend) \
			((23 << 26) | ((src) << 21) | ((dst) << 16) | \
			 ((shiftreg) << 11) | ((mbegin) << 6) | ((mend) << 1))

/* Every NETF_arg generates at most four instructions (4 for PUSHIND).
   Every NETF_op generates at most 3 instructions (3 for EQ and NEQ).  */
#define MAX_INSTR_PER_ARG 4
#define MAX_INSTR_PER_OP  3
#define MAX_INSTR_PER_ITEM (MAX_INSTR_PER_ARG + MAX_INSTR_PER_OP)
int junk_filter[MAX_INSTR_PER_ITEM];

enum {NF_LITERAL, NF_HEADER, NF_DATA};
struct common {	/* Keeps track of values we might want to avoid reloading. */
    char type;	/* NF_LITERAL: immediate; NF_HEADER: header word;
		   NF_DATA: data word. */
    char nuses;	/* Number of remaining uses for this value. */
    unsigned char reg;
		/* Register this value is currently in, or NO_REG if none. */
    unsigned short value;
		/* Immediate value or header or data offset. */
};
struct reg {	/* Keeps track of the current contents of registers. */
    unsigned char commoni;
			/* Index in common[] of the contained value. */
#define NOT_COMMON_VALUE NET_MAX_FILTER	/* When not a common[] value. */
    unsigned char stacktimes;
			/* Number of times register appears in stack. */
};
struct local {	/* Gather local arrays so we could kalloc() if needed.  */
    struct common common[NET_MAX_FILTER];	/* Potentially common values. */
    struct reg regs[NREGS];			/* Register statuses. */
    unsigned char commonpos[NET_MAX_FILTER];	/* Index in common[] for the
						   value loaded in each filter
						   command. */
    unsigned char stackregs[NET_FILTER_STACK_DEPTH];
						/* Registers making up the
						   stack. */
#if USE_EXTRA_REGS
    unsigned char maxreg;
#endif
};

int allocate_register(struct local *s, int commoni);
int compile_preamble(int *instructions, struct local *s);

/* Compile a packet filter into POWERPC machine code.  We do everything in
   the 7 caller-saves registers listed in scratchregs[], except when
   USE_EXTRA_REGS is defined, in which case we may also allocate callee-
   saves registers if needed.  (Not yet implemented on PPC.)

   Rather than maintaining an explicit stack in memory, we allocate registers
   dynamically to correspond to stack elements -- we can do this because we
   know the state of the stack at every point in the filter program.  We also
   attempt to keep around in registers values (immediates, or header or data
   words) that are used later on, to avoid having to load them again.
   Since there are only 7 registers being used, we might be forced to reload
   a value that we could have kept if we had more.  We might even be unable
   to contain the stack in the registers, in which case we return failure and
   cause the filter to be interpreted by net_do_filter().  But for all current
   filters I looked at, 7 registers is enough even to avoid reloads.  When
   USE_EXTRA_REGS is defined there are about 28 available registers, which is
   plenty.

   We depend heavily on NET_MAX_FILTER and NET_FILTER_STACK_DEPTH being
   small.  We keep indexes to arrays sized by them in char-sized fields,
   originally because we tried allocating these arrays on the stack.
   Even then we overflowed the small (4K) kernel stack, so we were forced
   to allocate the arrays dynamically, which is the reason for the existence
   of `struct local'.

   We also depend on the filter being logically correct, for instance not
   being longer than NET_MAX_FILTER or underflowing its stack.  This is
   supposed to have been checked by parse_net_filter() before the filter
   is compiled.

   We are supposed to return 1 (TRUE) if the filter accepts the packet
   and 0 (FALSE) otherwise.  In fact, we may return any non-zero value
   for true, which is sufficient for our caller and convenient for us.

   There are lots and lots of optimisations that we could do but don't.
   This is supposedly a *micro*-kernel, after all.  Here are some things
   that could be added without too much headache:
   - Using the condition register.  We go to a lot of trouble to generate
     integer truth values for EQ etc, but most of the time those values
     are just ANDed or ORed together or used as arguments to COR etc.  So
     we could compute the comparison values directly into CR bits and
     operate on them using the CR logical instructions without (most of
     the time) ever having to generate integer equivalents.
   - More registers.  We could note the last uses of r3, r4, and
     r5, and convert them to general registers after those uses.  But if
     register shortage turns out to be a problem it is probably best just
     to define USE_EXTRA_REGS and have done with it.
   - Minimising range checks.  Every time we refer to a word in the data
     part, we generate code to ensure that it is within bounds.  But often
     the truth of these tests is implied by earlier tests.  Instead, at the
     start of the filter and after every COR or CNAND we could insert
     a single check when that is necessary.  (After CAND and CNOR we don't
     need to check since if they terminate it will be to return FALSE
     anyway so all we'd do would be to return it prematurely.)
   - Remembering immediate values.  Instead of generating code as soon as we
     see a PUSHLIT, we could remember that value and only generate code when
     it is used.  This would enable us to generate certain shorter
     instructions (like addi) that incorporate the immediate value instead
     of ever putting it in a register.
 */

filter_fct_t
net_filter_alloc(filter_t *filter, unsigned int size, unsigned int *lenp)
{
    struct local *s;
    int len, oldi, i, j, t, ncommon, sp;
    int type, value, arg, op, reg, reg1, dst, commoni;
    int returnfalseoffset;
    int *instructions, *instp, *returnfalse;
#if USE_EXTRA_REGS
    int oldmaxreg;
#endif
    boolean_t compiling;

#define SCHAR_MAX 127	/* machine/machlimits->h, anyone? */
    assert(NET_MAX_FILTER <= SCHAR_MAX);
    assert(NET_FILTER_STACK_DEPTH <= SCHAR_MAX);
    assert(NREGS <= SCHAR_MAX);

    assert(size < NET_MAX_FILTER);

    s = (struct local *) kalloc(sizeof *s);

#if USE_EXTRA_REGS
    s->maxreg = INITIAL_NSCRATCHREGS;
#endif
    len = 0;
    compiling = FALSE;
    returnfalse = junk_filter;

    /* This loop runs at least twice, once with compiling==FALSE to determine
       the length of the instructions we will compile, and once with
       compiling==TRUE to compile them.  The code generated on the two passes
       must be the same.  In the USE_EXTRA_REGS case, the loop can be re-run
       an extra time while !compiling, if we decide to use the callee-saves
       registers.  This is because we may be able to generate better code with
       the help of these registers than before.  */
    while (1) {

	/* Identify values that we can potentially preserve in a register to
	   avoid having to reload them.  All immediate values and references to
	   known offsets in the header or data are candidates.  The results of
	   this loop are the same on every run, so with a bit of work we
	   could run it just once; but this is not a time-critical
	   application.  */
	ncommon = 0;
	for (i = 0; i < size; i++) {
	    oldi = i;
	    arg = NETF_ARG(filter[i]);
	    if (arg == NETF_PUSHLIT) {
		type = NF_LITERAL;
		value = filter[++i];
	    } else if (arg >= NETF_PUSHSTK) {
		continue;
	    } else if (arg >= NETF_PUSHHDR) {
		type = NF_HEADER;
		value = arg - NETF_PUSHHDR;
	    } else if (arg >= NETF_PUSHWORD) {
		type = NF_DATA;
		value = arg - NETF_PUSHWORD;
	    } else {
		continue;
	    }
	    for (j = 0; j < ncommon; j++) {
		if (s->common[j].type == type && s->common[j].value == value) {
		    s->common[j].nuses++;
		    break;
		}
	    }
	    if (j == ncommon) {
		s->common[j].type = type;
		s->common[j].value = value;
		s->common[j].nuses = 1;
		ncommon++;
	    }
	    s->commonpos[oldi] = j;
	}

#if USE_EXTRA_REGS
	oldmaxreg = s->maxreg;
#endif

	/* Initially, no registers hold common values or are on the stack.  */
	for (i = 0; i < ncommon; i++)
	    s->common[i].reg = NO_REG;
	for (i = 0; i < NSCRATCHREGS; i++) {
	    s->regs[scratchregs[i]].commoni = NOT_COMMON_VALUE;
	    s->regs[scratchregs[i]].stacktimes = 0;
	}

	/* Now read through the filter and generate code. */
	sp = -1;	/* sp points to top element */
	for (i = 0; i < size; i++) {
	    if (!compiling)
		instp = junk_filter;

	    assert(sp >= -1);
	    assert(sp < NET_FILTER_STACK_DEPTH - 1);
	    commoni = s->commonpos[i];
	    arg = NETF_ARG(filter[i]);
	    op = NETF_OP(filter[i]);

	    /* Generate code to get the required value into a register and
	       set `reg' to the number of this register. */
	    switch (arg) {
	    case NETF_PUSHLIT:
		value = filter[++i];
		reg = s->common[commoni].reg;
		if (reg == NO_REG) {
		    if ((reg = allocate_register(s, commoni)) == NO_REG)
			goto fail;
		    assert(value >= 0);	/* Comes from unsigned short. */
		    *instp++ = ORI(reg, REG_ZERO, value);
		}
		s->common[commoni].nuses--;
		break;
	    case NETF_NOPUSH:
		reg = s->stackregs[sp--];
		s->regs[reg].stacktimes--;
		break;
	    case NETF_PUSHZERO:
		reg = REG_ZERO;
		break;
	    case NETF_PUSHIND:
	    case NETF_PUSHHDRIND:
		reg1 = s->stackregs[sp--];
		s->regs[reg1].stacktimes--;
		if (arg == NETF_PUSHIND)
		    *instp++ = CMPL(reg1, REG_DATALEN);
		else
		    *instp++ = CMPLI(reg1,
				     NET_HDW_HDR_MAX/sizeof (unsigned short));
		*instp = BC(COND_GE, returnfalse - instp);
		instp++;
		if ((reg = allocate_register(s, -1)) == NO_REG)
		    goto fail;
		*instp++ = ADD(reg, reg1, reg1);
		*instp++ = LHZX(reg, (arg == NETF_PUSHIND) ?
					REG_DATAADDR : REG_HDRADDR, reg);
		break;
	    default:
		if (arg >= NETF_PUSHSTK)
		    reg = s->stackregs[sp - (arg - NETF_PUSHSTK)];
		else if (arg >= NETF_PUSHWORD) {
		    assert(2 * (NETF_PUSHHDR - NETF_PUSHWORD) <= MAX_LI);
		    assert(NETF_PUSHSTK - NETF_PUSHHDR <= MAX_LI);
		    reg = s->common[commoni].reg;
		    if (reg == NO_REG) {
			if ((reg = allocate_register(s, commoni)) == NO_REG)
			    goto fail;
			if (arg < NETF_PUSHHDR) {
			    value = arg - NETF_PUSHWORD;
			    *instp++ = CMPLI(REG_DATALEN, value);
			    *instp = BC(COND_LE, returnfalse - instp);
			    instp++;
			    reg1 = REG_DATAADDR;
			} else {
			    value = arg - NETF_PUSHHDR;
			    reg1 = REG_HDRADDR;
			}
			*instp++ = LHZ(reg, reg1, 2 * value);
		    }
		    s->common[commoni].nuses--;
		}
	    }

	    /* Now generate code to do `op' on `reg1' (lhs) and `reg' (rhs). */
	    if (op != NETF_NOP) {
		reg1 = s->stackregs[sp--];
		s->regs[reg1].stacktimes--;
	    }
	    switch (op) {
	    case NETF_OP(NETF_CAND):
	    case NETF_OP(NETF_COR):
	    case NETF_OP(NETF_CNAND):
	    case NETF_OP(NETF_CNOR):
		dst = -1;
	    case NETF_OP(NETF_NOP):
		break;
	    default:
		/* Allocate a register to put the result in. */
		if ((dst = allocate_register(s, -1)) == NO_REG)
		    goto fail;
	    }
	    switch (op) {
	    case NETF_OP(NETF_NOP):
		dst = reg;
		break;
	    case NETF_OP(NETF_EQ):
	    case NETF_OP(NETF_NEQ):
		/* We arrange for the truth value to end up in the carry
		   flag and then put it in the destination register by
		   adding-with-carry zero to itself.  To set the carry, we
		   first make a value `x' that is zero if the values are
		   equal; this is either their XOR, or, if we know the
		   rhs is 0, the lhs.  Then to set the carry only when
		   x==0 we do `subfic dst,x,0' (subtract x from 0, setting
		   carry as not-borrow, so set only if x==0); to set it when
		   x!=0 we do `addic dst,x,-1' (add -1 to x setting carry,
		   so set unless x==0).  We're only interested in the carry
		   from these operations, not dst.
		   We don't test if reg1==REG_ZERO since in practice you
		   write NETF_PUSHLIT|NETF_EQ; the other order is eccentric
		   so you get an extra instruction, tough.  */
		if (reg == REG_ZERO)
		    t = reg1;
		else {
		    *instp++ = XOR(dst, reg1, reg);
		    t = dst;
		}
		*instp++ = (op == NETF_OP(NETF_EQ)) ?
			    SUBFIC(dst, t, 0) : ADDIC(dst, t, -1);
		*instp++ = ADDE(dst, REG_ZERO, REG_ZERO);
		break;
	    case NETF_OP(NETF_LT):
		/* LT and GT take advantage of the fact that all numbers are
		   16-bit quantities, so the sign bit after a subtraction
		   is a reliable indication of the relative magnitudes of
		   the operands.  */
		*instp++ = SUBF(dst, reg, reg1);	/* dst = reg1 - reg */
		*instp++ = RLWINM(dst, dst, 1, 31, 31);	/* sign bit */
		break;
	    case NETF_OP(NETF_GT):
		*instp++ = SUBF(dst, reg1, reg);	/* dst = reg - reg1 */
		*instp++ = RLWINM(dst, dst, 1, 31, 31);	/* sign bit */
		break;
	    case NETF_OP(NETF_LE):
		/* LE and GE use the carry (= not-borrow) flag.  When doing
		   a - b, there is a borrow if b > a, so carry if b <= a. */
		*instp++ = SUBFC(dst, reg1, reg);	/* dst = reg - reg1 */
		*instp++ = ADDE(dst, REG_ZERO, REG_ZERO);/* ca if reg1 <= reg */
		break;
	    case NETF_OP(NETF_GE):
		*instp++ = SUBFC(dst, reg, reg1);	/* dst = reg1 - reg */
		*instp++ = ADDE(dst, REG_ZERO, REG_ZERO);/* ca if reg <= reg1 */
		break;
	    case NETF_OP(NETF_AND):
		j = OP_AND;
		goto logical;
	    case NETF_OP(NETF_OR):
		j = OP_OR;
		goto logical;
	    case NETF_OP(NETF_XOR):
		j = OP_XOR;
		goto logical;
	    case NETF_OP(NETF_RSH):
		j = OP_SRW;
logical:
		*instp++ = LOGIC_OP(j, dst, reg1, reg);
		break;
	    case NETF_OP(NETF_ADD):
		j = OP_ADD;
		goto arithmetical;
	    case NETF_OP(NETF_SUB):
		j = OP_SUBF;	/* First reg subtracted from second. */
arithmetical:
		*instp++ = ARITH_OP(j, dst, reg, reg1);
		*instp++ = ANDI(dst, dst, 0xffff);
		break;
	    case NETF_OP(NETF_LSH):
		*instp++ = RLWNM(dst, reg1, reg, 16, 31);
		break;
	    case NETF_OP(NETF_COR):
	    case NETF_OP(NETF_CNAND):
		*instp++ = CMPL(reg1, reg);
		*instp++ = BCLR((op == NETF_OP(NETF_COR)) ? COND_EQ : COND_NE);
		break;
	    case NETF_OP(NETF_CAND):
	    case NETF_OP(NETF_CNOR):
		*instp++ = CMPL(reg1, reg);
		*instp = BC((op == NETF_OP(NETF_CAND)) ? COND_NE : COND_EQ,
			    returnfalse - instp);
		instp++;
		break;
	    default:
		printf("op == 0x%x\n", op);
		panic("net_filter_alloc: bad op");
		/* Should have been caught by parse_net_filter(). */
	    }
	    /* If the op generated a result, push it on the stack. */
	    if (dst >= 0) {
		s->stackregs[++sp] = dst;
		s->regs[dst].stacktimes++;
	    }
	    if (!compiling) {
		assert(instp - junk_filter <= MAX_INSTR_PER_ITEM);
		len += instp - junk_filter;
	    }
	}
	if (compiling) {
	    /* If the stack contains any values, we are supposed to return 0 or
	       1 according as the top-of-stack is zero or not.  Since the only
	       place we are called requires just zero-false/nonzero-true, we
	       simply copy the value into r3.  If the stack is empty, we
	       leave the pointer value r3 intact to return TRUE.  */
	    if (sp >= 0)
		*instp++ = MR(REG_RET, s->stackregs[sp]);
	    *instp++ = BLR();
	    /* Branch here to return false.  We could avoid adding these
	       instructions if they are not used, but practically every
	       filter does use them (for failure values when trying to
	       access values beyond the header or data length) so it's
	       not worth the effort.  */
	    assert(instp == returnfalse);
	    *instp++ = LI(REG_RET, 0);
	    *instp++ = BLR();
	    break;
	} else {
	    len += 1 + (sp >= 0);
			/* For the reach-the-end return instruction(s).  */
#if USE_EXTRA_REGS
	    if (s->maxreg > oldmaxreg) {
		len = 0;
		continue;
	    }
#endif
	    len += compile_preamble(NULL, s);
	    returnfalseoffset = len;
	    len += 2;	/* For the return-false instructions.  */
	}
	if ((instructions = (int *) kalloc(len * sizeof (int))) == NULL)
	    return NULL;
	returnfalse = instructions + returnfalseoffset;
	instp = instructions;
	instp += compile_preamble(instp, s);
	compiling = TRUE;
    }

    assert(instp - instructions == len);
    *lenp = len * sizeof (int);
    {
	kern_return_t kr;
	vm_machine_attribute_val_t val = MATTR_VAL_CACHE_SYNC;

	kr = pmap_attribute(kernel_pmap, (vm_offset_t) instructions,
			    len * sizeof (int), MATTR_CACHE, &val);
	if (kr != KERN_SUCCESS) {
	    printf("net_filter_alloc: pmap_attribute -> 0x%x\n", kr);
	    return NULL;
	}
    }
    kfree((vm_offset_t) s, sizeof *s);
    return (filter_fct_t) instructions;
fail:
    assert(!compiling);
    kfree((vm_offset_t) s, sizeof *s);
    printf("net_filter_alloc: failed to compile (filter too complex)\n");
    printf("-- will work, but more slowly; consider enabling USE_EXTRA_REGS\n");
    return NULL;
}


/* Allocate a register.  Registers that are already being used to make up
   the virtual stack are ineligible.  Among the others, we choose the one
   whose value has the least number of subsequent uses (ideally, and
   usually, 0) of the common value it already holds.  If commoni is >=
   0, it is the index in common[] of the value we are going to put in
   the allocated register, so we can update the various data structures
   appropriately.  */
int
allocate_register(struct local *s, int commoni)
{
    int i, reg, bestreg, nuses, bestregnuses, maxreg;

    bestreg = NO_REG;
#if USE_EXTRA_REGS
    maxreg = s->maxreg;
#else
    maxreg = NSCRATCHREGS;
#endif
    while (1) {
	bestregnuses = NOT_COMMON_VALUE;
	for (i = 0; i < maxreg; i++) {
	    reg = scratchregs[i];
	    if (s->regs[reg].stacktimes == 0) {
		nuses = (s->regs[reg].commoni == NOT_COMMON_VALUE) ?
			0 : s->common[s->regs[reg].commoni].nuses;
		if (nuses < bestregnuses) {
		    bestreg = reg;
		    bestregnuses = nuses;
		}
	    }
	}
	if (bestreg != NO_REG)
	    break;
#if USE_EXTRA_REGS
	if (maxreg == NSCRATCHREGS)
	    return NO_REG;
	s->maxreg = ++maxreg;
#else
	return NO_REG;
#endif
    }
    if (bestregnuses > 0)
	printf("net_filter_alloc: forced to reallocate r%d\n", bestreg);
	/* With USE_EXTRA_REGS, we could push up the number of registers
	   here to have one extra available for common values, but it's usually
	   not worth the overhead of the extra save-and-restore in the preamble.
	   Anyway, this never happens with typical filters.  */
    if (s->regs[bestreg].commoni != NOT_COMMON_VALUE)
	s->common[s->regs[bestreg].commoni].reg = NO_REG;
    if (commoni >= 0) {
	s->regs[bestreg].commoni = commoni;
	s->common[commoni].reg = bestreg;
    } else
	s->regs[bestreg].commoni = NOT_COMMON_VALUE;
    return bestreg;
}


#define FIXED_PREAMBLE_INSTRUCTIONS 1

int
compile_preamble(int *instructions, struct local *s)
{
    int *instp;
    int len = FIXED_PREAMBLE_INSTRUCTIONS;
#if USE_EXTRA_REGS
#error this hp code must be ported to the ppc
    int extra_regs, i, j, t, disp;

    extra_regs = s->maxreg - INITIAL_NSCRATCHREGS;
    if (extra_regs > 0) {
	len = extra_regs * 2 + 4;
	/* stw rp | (n-1) * stw | bl | stw | ldw rp | (n-1) * ldw | bv | ldw */
    } else
	return 0;
#endif
    if (instructions == NULL)
	return len;
    instp = instructions;

    *instp++ = LI(REG_ZERO, 0);
    assert(instp - instructions == FIXED_PREAMBLE_INSTRUCTIONS);

#if USE_EXTRA_REGS
#error this hp code must be ported to the ppc
    /* Generate a wrapper function to save the callee-saves registers
       before invoking the filter code we have generated.  It would be
       marginally better to have the filter branch directly to the
       postamble code on return, but the difference is trivial and it
       is easier to have it always branch to (rp).  */
#define FRAME_SIZE 128	/* This is plenty without being excessive. */
    *instp++ = STW_NEG(REG_RTN, 20, REG_SP);		/* stw rp,-20(sp) */
    i = INITIAL_NSCRATCHREGS;
    t = STWM(scratchregs[i], FRAME_SIZE, REG_SP);	/* stwm r3,128(sp) */
    j = FRAME_SIZE;
    while (++i < s->maxreg) {
	*instp++ = t;
	j -= sizeof (int);
	t = STW_NEG(scratchregs[i], j, REG_SP);		/* stw r4,-124(sp) &c */
    }
    disp = extra_regs + 2;	/* n * ldw | bv | ldw rp */
    *instp++ = BL(disp, REG_RTN);			/* bl filter,rp */
    *instp++ = t;					/* stw in delay slot */
    *instp++ = LDW_NEG(FRAME_SIZE + 20, REG_SP, REG_RTN);
							/* ldw -148(sp),rp */
    while (--i > INITIAL_NSCRATCHREGS) {
	*instp++ = LDW_NEG(j, REG_SP, scratchregs[i]);	/* ldw -124(sp),r4 &c */
	j += sizeof (int);
    }
    *instp++ = BV(0, REG_RTN);				/* bv (rp) */
    *instp++ = LDWM_NEG(FRAME_SIZE, REG_SP, scratchregs[i]);
							/* ldwm -128(sp),r3
							   in delay slot */
#endif

    assert(instp - instructions == len);
    return len;
}

void
net_filter_free(filter_fct_t fp, unsigned int len)
{
    kfree((vm_offset_t) fp, len);
}

#else	/* NET_FILTER_COMPILER */

/*
 *	Compilation of a source network filter into ppc instructions
 *	- a small version that doesnt do anything, but doesn't take
 *	up any space either. Note that if using a single mklinux server
 *	with ethertalk enabled (standard situation), the filter passes
 *	everything through so no need to compile one. If running multi
 *      servers then there is more of a need. Ethertalk (in linux server)
 *      should really have a packet filter, but at time of writing
 *	it does not.
 */
filter_fct_t
net_filter_alloc(
	filter_t	*fpstart,
	unsigned int	fplen,
	unsigned int	*len)
{
	*len = 0;
	return ((filter_fct_t)0);
}

void
net_filter_free(
	filter_fct_t	fp,
	unsigned int	len)
{
	assert(fp == (filter_fct_t)0 && len == 0);
}
#endif	/* NET_FILTER_COMPILER */