Ancillary Data. Passing descriptors and credentials.



On Unix systems beside INET domain sockets you can have UNIX domain sockets. An Unix domain communication is limited to the local system. That is, you can't use Unix domain sockets to communicate across a network. But, unless INET communications, an Unix domain message may have attached to the messages some sort of ancillary data. In this moment these can be: To open a Unix domain communication you have first to create a socket, both in client and server:
The server will call bind in order to associate the socket with a file name so it can be accessed by the clients. Then it call listen and enter into a accept loop.
The client, will use connect with the server socket name in order to establish a connection with the server.
Server side socket creation:

int create_unix_socket(const char *sockname,int mode, int recvcred){
	int sockfd;
	struct sockaddr_un serv_addr;
	int addrlen;
	int ret;

	if((sockfd=socket(AF_UNIX,SOCK_STREAM,0))<0)
		return -errno;

	unlink(sockname);

	bzero((char *)&serv_addr, sizeof(serv_addr));
	serv_addr.sun_family=AF_UNIX;
	strcpy(serv_addr.sun_path,sockname);
	addrlen=strlen(serv_addr.sun_path)+sizeof(serv_addr.sun_family);
	if(bind(sockfd,(struct sockaddr *)&serv_addr,addrlen)<0)
		return -errno;

	chmod(sockname,mode);
	listen(sockfd,5);
	return sockfd;
};

       
Client side connection opening:

int open_connected_socket(const char *sockname){
	int sockfd;
	int servlen;
	struct sockaddr_un serv_addr;
	int passcred;

	bzero((char *)&serv_addr, sizeof(serv_addr));
	serv_addr.sun_family=AF_UNIX;
	strcpy(serv_addr.sun_path,sockname);
	servlen=strlen(serv_addr.sun_path)+sizeof(serv_addr.sun_family);
	if((sockfd=socket(AF_UNIX,SOCK_STREAM,0))<0) return -errno;
	if(connect(sockfd,(struct sockaddr *)&serv_addr,servlen)<0) return -errno;

	/* Set the option to receive the peer credentials */
	passcred=1;
        setsockopt(sockfd,SOL_SOCKET,SO_PASSCRED,(void *)&passcred,sizeof(passcred));

	return sockfd;
};

        

Ref: Unix manpages unix(7) socket(2) bind(2) connect(2)

Sending an receiving messages

For sending/receiving the messages, the functions sendmsg/recvmsg are to be used. These functions are message oriented, and they can scatter/gather the data into a similar manner as readv/sendv function does. What is important to understand related to scatter/gather, is that the entity is the message itself and is not fragmented into the fragments specified in the iovec. What I mean is that even you send 3 chunks of data of 10, 20 and 5 bytes each, if the receiver set up 3 buffers of 100 bytes each you are not gonna get the data split into the buffers as you sent it, but all of it will get accumulated into first buffer. That is, the recvmsg, will put data into first buffer until it fill it, then the remaining data(if any) will be dispatched into the second buffer and so on. The return of recvmsg will tell you the total amount of data received. The iov_len values from iovec will not be touched in any way. That is, (unfortunate) we can't just use iovec in order to avoid parsing the data.
In order to attach ancillary data to a message, you have to pack them into a buffer pointed by the msg_controll member in to the msghdr structure. For manipulating ancillary data you always have to use the CMSG_* macros, in order to achieve the portability of your code.

Ref: Unix manpages recvmsg(2) sendmsg(2), cmsg(3)

Sending messages having attached ancillary data:
int send_message(int sock, const struct iovec *vec, int vec_size, const int *desc, int nr_desc,
		const struct ucred *cred){
	struct msghdr msg={0};
	int sndret;

	if(vec==NULL || vec_size<1) return -EINVAL;
	if(nr_desc<0 || nr_desc>MAX_NR_OF_DESCRIPTORS) return -EINVAL;
	if(desc==NULL) nr_desc=0;

	msg.msg_iov=(struct iovec *)vec;
	msg.msg_iovlen=vec_size;

/* sending ancillary data */
	if( cred != NULL || nr_desc > 0){
		int desclen=0;
		int credlen=0;
		struct cmsghdr *cmsg=NULL;
		char buff[CMSG_SPACE(sizeof(int)*MAX_NR_OF_DESCRIPTORS)+CMSG_SPACE(sizeof(struct ucred))]={0};

		msg.msg_control=buff;
		msg.msg_controllen=sizeof(buff);
		cmsg=CMSG_FIRSTHDR(&msg);
		if(cmsg==NULL) return -EINVAL;

	/* packing credentials */
		if(cred != NULL){
			cmsg->cmsg_level=SOL_SOCKET;
			cmsg->cmsg_type=SCM_CREDENTIALS;
			credlen=cmsg->cmsg_len=CMSG_LEN(sizeof(struct ucred));
			memcpy((int *)CMSG_DATA(cmsg),desc,sizeof(struct ucred));
			cmsg=CMSG_NXTHDR(&msg,cmsg);
		};

	/* packing files descriptors */
		if(nr_desc>0){
			cmsg->cmsg_level=SOL_SOCKET;
			cmsg->cmsg_type=SCM_RIGHTS;
			desclen=cmsg->cmsg_len=CMSG_LEN(sizeof(int)*nr_desc);
			memcpy((int *)CMSG_DATA(cmsg),desc,sizeof(int)*nr_desc);
			cmsg=CMSG_NXTHDR(&msg,cmsg);
		}

		/* finished packing updating the corect length */
		msg.msg_controllen=desclen+credlen;
	} else{
		msg.msg_control=NULL;
		msg.msg_controllen=0;
	};

	sndret=sendmsg(sock,&msg,0);
	if(sndret<0) return -errno;
	else return sndret;
}

       
Receiving messages having attached ancillary data:

static int get_descriptors(struct cmsghdr *cmsg,struct msghdr *msg,int *fds, int maxdesc){
	int retnr=0;
	if(cmsg==NULL || msg==NULL) return 0;
	if(cmsg->cmsg_type!=SCM_RIGHTS) return 0;

	if(msg->msg_controllen>0) {
		int nrdesc;
		int firstReceived;
		int payload=cmsg->cmsg_len-sizeof(*cmsg);
		int *recvdesc=(int *)CMSG_DATA(cmsg);
		int i;

		nrdesc=payload/sizeof(int);
		retnr=nrdesc0) *fds++=*recvdesc++;
			else close(*recvdesc++);
		};
	};
	return retnr;
}

static int get_credentials(struct cmsghdr *cmsg,struct msghdr *msg, struct ucred *cred){
	if(cmsg==NULL || msg==NULL || cred==NULL) return -EINVAL;
	if(cmsg->cmsg_type!=SCM_CREDENTIALS) return -EINVAL;
	if(cmsg->cmsg_len>0){
		void *recvcred=(struct ucred *)CMSG_DATA(cmsg);
		if(recvcred !=NULL)
			memcpy(cred,recvcred,sizeof(struct ucred));
	};
	return cmsg->cmsg_len;
}


int recv_message(int sock, struct iovec *vec, int vec_max_size, int *vec_size,
					int  *desc, int max_nr_desc, int *nr_desc,
					struct ucred *cred){

	char buff[CMSG_SPACE(sizeof(int)*MAX_NR_OF_DESCRIPTORS)+CMSG_SPACE(sizeof(struct ucred))]={0};
	struct msghdr msg={0};
	struct cmsghdr *cmsg=NULL;
	int ret;

	if(vec==NULL || vec_max_size<1 || vec_size==NULL)  return -EINVAL;
	msg.msg_iov=vec;
	msg.msg_iovlen=vec_max_size;
	msg.msg_control=buff;
	msg.msg_controllen=sizeof(buff);

	ret=recvmsg(sock,&msg,0);
	if(ret<0) return -errno;
	*vec_size=msg.msg_iovlen;

/* get the ANCILLARY data */
	cmsg=CMSG_FIRSTHDR(&msg);
	if(cmsg==NULL){
		if(nr_desc !=NULL) *nr_desc=0;
	} else{
		int iter=0;
		int fdnum;

		for(;cmsg !=NULL;cmsg=CMSG_NXTHDR(&msg,cmsg),iter++){
			switch(cmsg->cmsg_type){
			case SCM_RIGHTS:
				if(desc !=NULL)
					fdnum=get_descriptors(cmsg,&msg,desc,MAX_NR_OF_DESCRIPTORS);
				if(nr_desc !=NULL) *nr_desc=fdnum;
				break;
			case SCM_CREDENTIALS:
				if(cred!=NULL)
					get_credentials(cmsg,&msg,cred);
				break;
			};
		};
	}
	return ret;
}
       

How to receive credentials

The credentials, defined by the struct ucred are the pid, the uid and gid of the sending process. In order to be able to receive the credentials, the receiving socket should call:
       	int passcred=1;
        setsockopt(sockfd,SOL_SOCKET,SO_PASSCRED,(void *)&passcred,sizeof(passcred));
        
In the case of a server, it have to call these piece of code for every accepted connection. Calling it on the listening socket won't help at all :-) If the receiving of credentials are enabled, the receiver of a message will get them even if the sender will not explicitly send the credentials. A process who want to send the credentials by itself is required to send his corect pid. For uid it can chose to send either the real uid or the effective uid. The same is valid for gid who can be regid or egid. These restrictions however doesn't apply to the processes running as root. usually root can "lie" whatever he want. On the Unix versions that have the POSIX capabilities, the following capabilities are required to be able to lie about a particular aspect: CAP_SYS_ADMIN -- to fake the pid, CAP_SETUID -- to fake the uid, CAP_SETGID -- to fake the gid.
Due to the possibility of sending fake credentials, a system who may have information that even root should not get them, is required to enforce the credentials verifications by another meanings. (see the chapter: Authentication of a trusted program).

How to send opened descriptors

This is the only way in which 2 unrelated programs can send each other "rights" on the filesystem or another protected resources. The sending process have to "pack" them into the controll buffer of the message. The receiver should have enough ancillary buffer available in order to receive them. A Unix process have a limit related to the number of opened files descriptors it can have, so the receiving process should have enough descriptors space to receive the sent fd. If no, it will receive only some of them. If a file was opened (by sender) for reading and writing, after receiving the receiver process can read/write into it, even if it run with privileges of a user who have no access to it.

Applications

In this subchapter I will focus about 2 applications unrelated to system security. The implications in system security will be subject of study for next subchapter.

Balancing execution

Let's imagine a web server who fork for every new connection allowing each child to handle the connection request. One way to improve the performance will be to fork in advance a number of child processes which keep an unix domain connection with the parent. When a new connection is accepted, the parent just pass the new opened socket to an available child to be served. This approach will save lots of fork() system call (which is a nontrivial processor power consumption) thus improving the performance of the web server.

Persistent connection with a database

Let's imagine we have a web server running CGI programs who have to display data resulted from a database query. Since each time when a CGI program is started it have to establish a connection with the database server (possibly a TCP/IP connection with a db server located on another machine) the performance are poor. One way to speed things up is to use a persistent database connection. So, we have a connection server, who is responsible to establish a number of database connections. When a CGI start it request a connection to the connection server who give it from his already open set. When CGI finished it send back the connection to the server, so it can be reused by another CGI when required. This may result in spectacular increase in speed.

Security applications

Avoiding root privileges for an listening server

Let's assume a server that have to perform tasks with the rights of a couple of different users. The standard solution for this problem is to have the server listening with the root privileges (since root is the only one able to start processes for another user). When a new connection is made, the server fork a new child who change his uid based on the login username then new child handle the request. This approach is dangerous, since if a hacker succeed to generate an buffer overflow into the parent to start an /bin/bash it get root privileges, so your system is screwed.
An alternative solution may be the following:
The server start with root privileges. It create a security token(let say an 256 bits random number), then fork a child process connected with the parent by an Unix domain socket. The child set its uid to noname(or another uid having no rights on the system), then the child open the TCP/IP port and l isten for new connection. When an incoming connection arise, it authenticate the user and if login is successfully, it send a message to the parent containing both the new established connection, the uid and the security token. Then the parent fork the process who handle the connection. This child change his uid, erase the value of inherited security token (since it doesn't need it anymore) then go to work. In case that a hacker perform an buffer overflow who exec the /bin/bash this shell will have no privileges, and the exec system call will erase the memory of the listening process so the security token will not be available to the intruder.

Controlled access to protected resources

Many programs running into a standard Unix system have to be setuid(or setgid) root in order to be able to access special resources. One example is the program ping. This program need to open an raw socket. Raw sockets are available only with root privileges (on systems without POSIX capabilities). This is an security risk. These programs have an rich set of command line options and display formats, so the probability of a buffer overflow is pretty high.
This risk may be greatly reduced if the ping would run with the current user privileges, and will request the raw sockets it need from a program who implement an very limited (paranoid:-) protocol, where there is no much room for exploits.

Demo

The demo downloadable from this page implement a server and a client to demonstrate the Controlled access to protected resources. After unpacking and compiling
        gunzip ancillary.tgz
        tar -xvf ancillary.tar
        make
       
You can login as root into a console and type make protect then start the server ./server_demo. The make protect will change the protections for the protected.file so than anybody other than root will not be able to read it. Then into another console, start the ./client_demo and you will see that this program can read the unreadable file.
Of course, for real life applications the client have to be authenticated by the server and checked if it is authorized to get the file. But this is the subject of the chapter: Authentication of a trusted program
This demo is done in plain C only to be easy understandable by many peoples.


Back to advanced Advanced Unix programming techniques page

Sys++ Project Home page

Visit M.T.M. Home Page