mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-24 14:07:52 -04:00
[IA64] implement csum_ipv6_magic for ia64.
The asm version is 4.4 times faster than the generic C version and 10X smaller in code size. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
parent
5b4d5681ff
commit
007d77d0c5
2 changed files with 59 additions and 2 deletions
|
@ -8,8 +8,8 @@
|
||||||
* in0: address of buffer to checksum (char *)
|
* in0: address of buffer to checksum (char *)
|
||||||
* in1: length of the buffer (int)
|
* in1: length of the buffer (int)
|
||||||
*
|
*
|
||||||
* Copyright (C) 2002 Intel Corp.
|
* Copyright (C) 2002, 2006 Intel Corp.
|
||||||
* Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
|
* Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <asm/asmmacro.h>
|
#include <asm/asmmacro.h>
|
||||||
|
@ -25,6 +25,9 @@
|
||||||
|
|
||||||
#define in0 r32
|
#define in0 r32
|
||||||
#define in1 r33
|
#define in1 r33
|
||||||
|
#define in2 r34
|
||||||
|
#define in3 r35
|
||||||
|
#define in4 r36
|
||||||
#define ret0 r8
|
#define ret0 r8
|
||||||
|
|
||||||
GLOBAL_ENTRY(ip_fast_csum)
|
GLOBAL_ENTRY(ip_fast_csum)
|
||||||
|
@ -88,3 +91,51 @@ GLOBAL_ENTRY(ip_fast_csum)
|
||||||
mov b0=r34
|
mov b0=r34
|
||||||
br.ret.sptk.many b0
|
br.ret.sptk.many b0
|
||||||
END(ip_fast_csum)
|
END(ip_fast_csum)
|
||||||
|
|
||||||
|
GLOBAL_ENTRY(csum_ipv6_magic)
|
||||||
|
ld4 r20=[in0],4
|
||||||
|
ld4 r21=[in1],4
|
||||||
|
dep r15=in3,in2,32,16
|
||||||
|
;;
|
||||||
|
ld4 r22=[in0],4
|
||||||
|
ld4 r23=[in1],4
|
||||||
|
mux1 r15=r15,@rev
|
||||||
|
;;
|
||||||
|
ld4 r24=[in0],4
|
||||||
|
ld4 r25=[in1],4
|
||||||
|
shr.u r15=r15,16
|
||||||
|
add r16=r20,r21
|
||||||
|
add r17=r22,r23
|
||||||
|
;;
|
||||||
|
ld4 r26=[in0],4
|
||||||
|
ld4 r27=[in1],4
|
||||||
|
add r18=r24,r25
|
||||||
|
add r8=r16,r17
|
||||||
|
;;
|
||||||
|
add r19=r26,r27
|
||||||
|
add r8=r8,r18
|
||||||
|
;;
|
||||||
|
add r8=r8,r19
|
||||||
|
add r15=r15,in4
|
||||||
|
;;
|
||||||
|
add r8=r8,r15
|
||||||
|
;;
|
||||||
|
shr.u r10=r8,32 // now fold sum into short
|
||||||
|
zxt4 r11=r8
|
||||||
|
;;
|
||||||
|
add r8=r10,r11
|
||||||
|
;;
|
||||||
|
shr.u r10=r8,16 // yeah, keep it rolling
|
||||||
|
zxt2 r11=r8
|
||||||
|
;;
|
||||||
|
add r8=r10,r11
|
||||||
|
;;
|
||||||
|
shr.u r10=r8,16 // three times lucky
|
||||||
|
zxt2 r11=r8
|
||||||
|
;;
|
||||||
|
add r8=r10,r11
|
||||||
|
mov r9=0xffff
|
||||||
|
;;
|
||||||
|
andcm r8=r9,r8
|
||||||
|
br.ret.sptk.many b0
|
||||||
|
END(csum_ipv6_magic)
|
||||||
|
|
|
@ -70,4 +70,10 @@ static inline __sum16 csum_fold(__wsum csum)
|
||||||
return (__force __sum16)~sum;
|
return (__force __sum16)~sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define _HAVE_ARCH_IPV6_CSUM 1
|
||||||
|
struct in6_addr;
|
||||||
|
extern unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
|
||||||
|
struct in6_addr *daddr, __u32 len, unsigned short proto,
|
||||||
|
unsigned int csum);
|
||||||
|
|
||||||
#endif /* _ASM_IA64_CHECKSUM_H */
|
#endif /* _ASM_IA64_CHECKSUM_H */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue