Ancillary Data. Passing descriptors and credentials.
On Unix systems beside INET domain sockets you can have UNIX domain sockets.
An Unix domain communication is limited to the local system. That is, you can't use Unix domain sockets
to communicate across a network. But, unless INET communications, an Unix domain message may have attached
to the messages some sort of ancillary data. In this moment these can be:
- Credentials
- Opened descriptors
To open a Unix domain communication you have first to create a socket, both in client and server:
The server will call bind in order to associate the socket with a file name so it can
be accessed by the clients. Then it call listen and enter into a accept loop.
The client, will use connect with the server socket name in order to establish a connection
with the server.
Server side socket creation:
int create_unix_socket(const char *sockname,int mode, int recvcred){
int sockfd;
struct sockaddr_un serv_addr;
int addrlen;
int ret;
if((sockfd=socket(AF_UNIX,SOCK_STREAM,0))<0)
return -errno;
unlink(sockname);
bzero((char *)&serv_addr, sizeof(serv_addr));
serv_addr.sun_family=AF_UNIX;
strcpy(serv_addr.sun_path,sockname);
addrlen=strlen(serv_addr.sun_path)+sizeof(serv_addr.sun_family);
if(bind(sockfd,(struct sockaddr *)&serv_addr,addrlen)<0)
return -errno;
chmod(sockname,mode);
listen(sockfd,5);
return sockfd;
};
Client side connection opening:
int open_connected_socket(const char *sockname){
int sockfd;
int servlen;
struct sockaddr_un serv_addr;
int passcred;
bzero((char *)&serv_addr, sizeof(serv_addr));
serv_addr.sun_family=AF_UNIX;
strcpy(serv_addr.sun_path,sockname);
servlen=strlen(serv_addr.sun_path)+sizeof(serv_addr.sun_family);
if((sockfd=socket(AF_UNIX,SOCK_STREAM,0))<0) return -errno;
if(connect(sockfd,(struct sockaddr *)&serv_addr,servlen)<0) return -errno;
/* Set the option to receive the peer credentials */
passcred=1;
setsockopt(sockfd,SOL_SOCKET,SO_PASSCRED,(void *)&passcred,sizeof(passcred));
return sockfd;
};
Ref: Unix manpages unix(7) socket(2) bind(2) connect(2)
For sending/receiving the messages, the functions sendmsg/recvmsg are to be used.
These functions are message oriented, and they can scatter/gather the data into a similar
manner as readv/sendv function does. What is important to understand related to scatter/gather,
is that the entity is the message itself and is not fragmented into the fragments specified
in the iovec. What I mean is that even you send 3 chunks of data of 10, 20 and 5 bytes each,
if the receiver set up 3 buffers of 100 bytes each you are not gonna get the data split
into the buffers as you sent it, but all of it will get accumulated into first buffer.
That is, the recvmsg, will put data into first buffer until it fill it, then the remaining
data(if any) will be dispatched into the second buffer and so on. The return of recvmsg will
tell you the total amount of data received. The iov_len values from iovec will not be touched
in any way. That is, (unfortunate) we can't just use iovec in order to avoid parsing the data.
In order to attach ancillary data to a message, you have to pack them into a buffer pointed by
the msg_controll member in to the msghdr structure. For manipulating ancillary data you always
have to use the CMSG_* macros, in order to achieve the portability of your code.
Ref: Unix manpages recvmsg(2) sendmsg(2), cmsg(3)
Sending messages having attached ancillary data:
int send_message(int sock, const struct iovec *vec, int vec_size, const int *desc, int nr_desc,
const struct ucred *cred){
struct msghdr msg={0};
int sndret;
if(vec==NULL || vec_size<1) return -EINVAL;
if(nr_desc<0 || nr_desc>MAX_NR_OF_DESCRIPTORS) return -EINVAL;
if(desc==NULL) nr_desc=0;
msg.msg_iov=(struct iovec *)vec;
msg.msg_iovlen=vec_size;
/* sending ancillary data */
if( cred != NULL || nr_desc > 0){
int desclen=0;
int credlen=0;
struct cmsghdr *cmsg=NULL;
char buff[CMSG_SPACE(sizeof(int)*MAX_NR_OF_DESCRIPTORS)+CMSG_SPACE(sizeof(struct ucred))]={0};
msg.msg_control=buff;
msg.msg_controllen=sizeof(buff);
cmsg=CMSG_FIRSTHDR(&msg);
if(cmsg==NULL) return -EINVAL;
/* packing credentials */
if(cred != NULL){
cmsg->cmsg_level=SOL_SOCKET;
cmsg->cmsg_type=SCM_CREDENTIALS;
credlen=cmsg->cmsg_len=CMSG_LEN(sizeof(struct ucred));
memcpy((int *)CMSG_DATA(cmsg),desc,sizeof(struct ucred));
cmsg=CMSG_NXTHDR(&msg,cmsg);
};
/* packing files descriptors */
if(nr_desc>0){
cmsg->cmsg_level=SOL_SOCKET;
cmsg->cmsg_type=SCM_RIGHTS;
desclen=cmsg->cmsg_len=CMSG_LEN(sizeof(int)*nr_desc);
memcpy((int *)CMSG_DATA(cmsg),desc,sizeof(int)*nr_desc);
cmsg=CMSG_NXTHDR(&msg,cmsg);
}
/* finished packing updating the corect length */
msg.msg_controllen=desclen+credlen;
} else{
msg.msg_control=NULL;
msg.msg_controllen=0;
};
sndret=sendmsg(sock,&msg,0);
if(sndret<0) return -errno;
else return sndret;
}
Receiving messages having attached ancillary data:
static int get_descriptors(struct cmsghdr *cmsg,struct msghdr *msg,int *fds, int maxdesc){
int retnr=0;
if(cmsg==NULL || msg==NULL) return 0;
if(cmsg->cmsg_type!=SCM_RIGHTS) return 0;
if(msg->msg_controllen>0) {
int nrdesc;
int firstReceived;
int payload=cmsg->cmsg_len-sizeof(*cmsg);
int *recvdesc=(int *)CMSG_DATA(cmsg);
int i;
nrdesc=payload/sizeof(int);
retnr=nrdesc0) *fds++=*recvdesc++;
else close(*recvdesc++);
};
};
return retnr;
}
static int get_credentials(struct cmsghdr *cmsg,struct msghdr *msg, struct ucred *cred){
if(cmsg==NULL || msg==NULL || cred==NULL) return -EINVAL;
if(cmsg->cmsg_type!=SCM_CREDENTIALS) return -EINVAL;
if(cmsg->cmsg_len>0){
void *recvcred=(struct ucred *)CMSG_DATA(cmsg);
if(recvcred !=NULL)
memcpy(cred,recvcred,sizeof(struct ucred));
};
return cmsg->cmsg_len;
}
int recv_message(int sock, struct iovec *vec, int vec_max_size, int *vec_size,
int *desc, int max_nr_desc, int *nr_desc,
struct ucred *cred){
char buff[CMSG_SPACE(sizeof(int)*MAX_NR_OF_DESCRIPTORS)+CMSG_SPACE(sizeof(struct ucred))]={0};
struct msghdr msg={0};
struct cmsghdr *cmsg=NULL;
int ret;
if(vec==NULL || vec_max_size<1 || vec_size==NULL) return -EINVAL;
msg.msg_iov=vec;
msg.msg_iovlen=vec_max_size;
msg.msg_control=buff;
msg.msg_controllen=sizeof(buff);
ret=recvmsg(sock,&msg,0);
if(ret<0) return -errno;
*vec_size=msg.msg_iovlen;
/* get the ANCILLARY data */
cmsg=CMSG_FIRSTHDR(&msg);
if(cmsg==NULL){
if(nr_desc !=NULL) *nr_desc=0;
} else{
int iter=0;
int fdnum;
for(;cmsg !=NULL;cmsg=CMSG_NXTHDR(&msg,cmsg),iter++){
switch(cmsg->cmsg_type){
case SCM_RIGHTS:
if(desc !=NULL)
fdnum=get_descriptors(cmsg,&msg,desc,MAX_NR_OF_DESCRIPTORS);
if(nr_desc !=NULL) *nr_desc=fdnum;
break;
case SCM_CREDENTIALS:
if(cred!=NULL)
get_credentials(cmsg,&msg,cred);
break;
};
};
}
return ret;
}
The credentials, defined by the struct ucred are the pid, the uid and gid of the sending process.
In order to be able to receive the credentials, the receiving socket should call:
int passcred=1;
setsockopt(sockfd,SOL_SOCKET,SO_PASSCRED,(void *)&passcred,sizeof(passcred));
In the case of a server, it have to call these piece of code for every accepted connection.
Calling it on the listening socket won't help at all :-)
If the receiving of credentials are enabled, the receiver of a message will get them even if
the sender will not explicitly send the credentials. A process who want to send the credentials
by itself is required to send his corect pid. For uid it can chose to send either the real uid
or the effective uid. The same is valid for gid who can be regid or egid.
These restrictions however doesn't apply to the processes running as root. usually
root can "lie" whatever he want. On the Unix versions that have the POSIX capabilities,
the following capabilities are required to be able to lie about a particular aspect:
CAP_SYS_ADMIN -- to fake the pid, CAP_SETUID -- to fake the uid, CAP_SETGID -- to fake the gid.
Due to the possibility of sending fake credentials, a system who may have information that
even root should not get them, is required to enforce the credentials verifications by
another meanings. (see the chapter: Authentication of a trusted program).
This is the only way in which 2 unrelated programs can send each other "rights" on the filesystem
or another protected resources. The sending process have to "pack" them into the controll buffer
of the message. The receiver should have enough ancillary buffer available in order to receive them.
A Unix process have a limit related to the number of opened files descriptors it can have,
so the receiving process should have enough descriptors space to receive the sent fd.
If no, it will receive only some of them. If a file was opened (by sender) for reading and writing,
after receiving the receiver process can read/write into it, even if it run with privileges
of a user who have no access to it.
In this subchapter I will focus about 2 applications unrelated to system security. The implications in system security will be
subject of study for next subchapter.
Balancing execution
Let's imagine a web server who fork for every new connection allowing each child to handle the connection request.
One way to improve the performance will be to fork in advance a number of child processes which keep an unix domain connection
with the parent. When a new connection is accepted, the parent just pass the new opened socket to an available child to be served.
This approach will save lots of fork() system call (which is a nontrivial processor power consumption) thus improving the performance
of the web server.
Persistent connection with a database
Let's imagine we have a web server running CGI programs who have to display data resulted from a database query.
Since each time when a CGI program is started it have to establish a connection with the database server (possibly a TCP/IP
connection with a db server located on another machine) the performance are poor.
One way to speed things up is to use a persistent database connection. So, we have a connection server, who is responsible to
establish a number of database connections. When a CGI start it request a connection to the connection server who give it from
his already open set. When CGI finished it send back the connection to the server, so it can be reused by another CGI when required.
This may result in spectacular increase in speed.
Avoiding root privileges for an listening server
Let's assume a server that have to perform tasks with the rights of a couple of different users.
The standard solution for this problem is to have the server listening with the root privileges
(since root is the only one able to start processes for another user). When a new connection is made, the server
fork a new child who change his uid based on the login username then new child handle the request.
This approach is dangerous, since if a hacker succeed to generate an buffer overflow into the parent to start an /bin/bash
it get root privileges, so your system is screwed.
An alternative solution may be the following:
The server start with root privileges. It create a security token(let say an 256 bits random number),
then fork a child process connected with the parent by an Unix domain socket.
The child set its uid to noname(or another uid having no rights on the system), then the child open the TCP/IP port and l
isten for new connection. When an incoming connection arise, it authenticate the user and if login is successfully,
it send a message to the parent containing both the new established connection, the uid and the security token.
Then the parent fork the process who handle the connection. This child change his uid, erase the value of
inherited security token (since it doesn't need it anymore) then go to work.
In case that a hacker perform an buffer overflow who exec the /bin/bash this shell will have no privileges,
and the exec system call will erase the memory of the listening process so the security token will not be available
to the intruder.
Controlled access to protected resources
Many programs running into a standard Unix system have to be setuid(or setgid) root in order to be able to
access special resources. One example is the program ping. This program need to open an raw socket.
Raw sockets are available only with root privileges (on systems without POSIX capabilities). This is an security risk.
These programs have an rich set of command line options and display formats, so the probability of a buffer overflow
is pretty high.
This risk may be greatly reduced if the ping would run with the current user privileges, and will request the raw sockets
it need from a program who implement an very limited (paranoid:-) protocol, where there is no much room for exploits.
Demo
The demo downloadable from this page implement a server and a client to demonstrate the
Controlled access to protected resources. After unpacking and compiling
gunzip ancillary.tgz
tar -xvf ancillary.tar
make
You can login as root into a console and type make protect then start the server ./server_demo.
The make protect will change the protections for the protected.file so than anybody other than root
will not be able to read it. Then into another console, start the ./client_demo and you will see
that this program can read the unreadable file.
Of course, for real life applications the client have to be authenticated by the server
and checked if it is authorized to get the file. But this is the subject of the chapter:
Authentication of a trusted program
This demo is done in plain C only to be easy understandable by many peoples.
Back to advanced Advanced Unix programming techniques page
Sys++ Project Home page
Visit M.T.M. Home Page