linuxkernel2.4.5ipv4socket层的一点解释
1.新建socket函数原形:
static int inet_create(struct socket *sock, int protocol)
在net/ipv4/af_inet.c中
详细解释
static int inet_create(struct socket *sock, int protocol)
{
struct sock *sk;
struct proto *prot;
sock->state = SS_UNCONNECTED;
/* 设置状态为未连接 */
sk = sk_alloc(PF_INET, GFP_KERNEL, 1); /* 申请sock所需的内存 */
/* net/core/sock.c */
if (sk == NULL)
goto do_oom;
switch (sock->type) {
case SOCK_STREAM:
/* TCP协议 */
if (protocol && protocol != IPPROTO_TCP)
goto free_and_noproto;
protocol = IPPROTO_TCP;
prot = &tcp_prot;
/* tcp_prot定义在net/ipv4/tcp_ipv4.c */
sock->ops = &inet_stream_ops; /* 针对STREAM的socket操作 */
break;
case SOCK_SEQPACKET:
/* 不支持 */
goto free_and_badtype;
case SOCK_DGRAM:
/* UDP协议 */
if (protocol && protocol != IPPROTO_UDP)
goto free_and_noproto;
protocol = IPPROTO_UDP;
sk->no_check = UDP_CSUM_DEFAULT;
prot=&udp_prot;
/* udp_prot定义在net/ipv4/udp.c */
sock->ops = &inet_dgram_ops; /* 针对DGRAM的socket操作 */
break;
case SOCK_RAW:
/* RAW */
if (!capable(CAP_NET_RAW)) /* 判断是否有权利建立SOCK_RAW */
goto free_and_badperm;
if (!protocol)
/* protocol不能为0 */
goto free_and_noproto;
prot = &raw_prot;
/* raw_prot定义在net/ipv4/raw.c */
sk->reuse = 1;
/* 允许地址重用 */
sk->num = protocol;
sock->ops = &inet_dgram_ops; /* RAW的一些特性和DGRAM相同 */
if (protocol == IPPROTO_RAW)
sk->protinfo.af_inet.hdrincl = 1;
/* 允许自己定制ip头 */
break;
default:
goto free_and_badtype;
}
if (ipv4_config.no_pmtu_disc)
sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
else
sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT;
sk->protinfo.af_inet.id = 0;
sock_init_data(sock,sk);
/* 初始化一些数据 */
/* net/core/sock.c */
sk->destruct = inet_sock_destruct; /* 当销毁socket时调用inet_sock_destruct */
sk->zapped = 0;
sk->family = PF_INET;
sk->protocol = protocol;
sk->prot = prot;
sk->backlog_rcv = prot->backlog_rcv; /* prot->backlog_rcv()见各个类型的定义 */
sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl; /* 设置默认ttl */
/* 修改/proc/sys/net/ipv4/ip_default_ttl */
sk->protinfo.af_inet.mc_loop = 1;
sk->protinfo.af_inet.mc_ttl = 1;
sk->protinfo.af_inet.mc_index = 0;
sk->protinfo.af_inet.mc_list = NULL;
#ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr);
#endif
if (sk->num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
sk->sport = htons(sk->num); /* 设置本地端口 */
/* Add to protocol hash chains. */
sk->prot->hash(sk);
}
if (sk->prot->init) {
int err = sk->prot->init(sk); /* 协议对socket的初始化 */
if (err != 0) {
inet_sock_release(sk);
return(err);
}
}
return(0);
free_and_badtype:
sk_free(sk);
/* 释放内存 */
return -ESOCKTNOSUPPORT;
free_and_badperm:
sk_free(sk);
return -EPERM;
free_and_noproto:
sk_free(sk);
return -EPROTONOSUPPORT;
do_oom:
return -ENOBUFS;
}
在net/core/sock.c
void sock_init_data(struct socket *sock, struct sock *sk)
{
skb_queue_head_init(&sk->receive_queue); /* 初始化3条队列 接受,发送,错误*/
skb_queue_head_init(&sk->write_queue);
skb_queue_head_init(&sk->error_queue);
init_timer(&sk->timer);
/* 初始化timer */
sk->allocation = GFP_KERNEL;
sk->rcvbuf = sysctl_rmem_default;
sk->sndbuf = sysctl_wmem_default;
sk->state = TCP_CLOSE;
sk->zapped = 1;
sk->socket = sock;
if(sock)
{
sk->type = sock->type;
sk->sleep = &sock->wait;
sock->sk = sk;
} else
sk->sleep = NULL;
sk->dst_lock
= RW_LOCK_UNLOCKED;
sk->callback_lock = RW_LOCK_UNLOCKED;
/* sock_def_wakeup(),sock_def_readable(),
sock_def_write_space(),sock_def_error_report(),
sock_def_destruct() 在net/core/sock.c */
sk->state_change = sock_def_wakeup;
sk->data_ready
= sock_def_readable;
sk->write_space
= sock_def_write_space;
sk->error_report = sock_def_error_report;
sk->destruct
=
sock_def_destruct;
sk->peercred.pid = 0;
sk->peercred.uid = -1;
sk->peercred.gid = -1;
sk->rcvlowat
= 1;
sk->rcvtimeo
= MAX_SCHEDULE_TIMEOUT; /* 设置接受,发送超时 */
sk->sndtimeo
= MAX_SCHEDULE_TIMEOUT;
atomic_set(&sk->refcnt, 1);
}
1.1 SOCK_STREAM的初始化
在net/ipv4/tcp_ipv4.c
static int tcp_v4_init_sock(struct sock *sk)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
skb_queue_head_init(&tp->out_of_order_queue);
tcp_init_xmit_timers(sk);
tcp_prequeue_init(tp);
tp->rto
= TCP_TIMEOUT_INIT;
tp->mdev = TCP_TIMEOUT_INIT;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
* algorithms that we must have the following bandaid to talk
* efficiently to them.
-DaveM
*/
tp->snd_cwnd = 2;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
*/
tp->snd_ssthresh = 0x7fffffff; /* Infinity */
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = 536;
tp->reordering = sysctl_tcp_reordering;
sk->state = TCP_CLOSE;
sk->write_space = tcp_write_space; /* tcp_write_space() 在net/ipv4/tcp.c */
sk->use_write_queue = 1;
sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
/* ipv4_specific 在net/ipv4/tcp_ipv4.c */
sk->sndbuf = sysctl_tcp_wmem[1]; /* 设置发送和接收缓冲区大小 */
sk->rcvbuf = sysctl_tcp_rmem[1]; /* sysctl_tcp_* 在net/ipv4/tcp.c */
atomic_inc(&tcp_sockets_allocated); /* tcp_sockets_allocated是当前TCP socket的数量 */
<