From: Christian Iwainsky (Christian.M.Iwainsky_at_informatik.stud.uni-erlangen.de)
Date: Wed Nov 02 2005 - 07:54:06 PST
Hello, I am still looking into that problem, with the cr_restart. It still terminates with "cri_syscall(CR_OP_RSTRT_REAP): Invalid argument" The two instances of the program go through the code code of >code.txt<, one instance has rank =0 and the other one has rank = 1. After the this code fragment the program is killed. Afterwards i tried to restart the instance 1 with the checkpoint debug_liz_tcp_turn_done_0_1.chkpt, where I get the obove stated message: Invalid argument. I appended the logfile from the kernel-messages log. The line ____________________________ corresponds to the start of the two instances of the program The second line "here it happenes _______________________________________" is the place where I restart the checkpoint. Maybe you can tell me, what happens here. Greetings Christian char fileName[1024]; DEBUG_ENTER(); int ret; dev_tcp_t *dev_tcp = LIZ_DEVICE_GET_PRIVATE(tcp, self, dev_tcp_t); assert(dev_tcp); /* * create server socket */ assert(dev_tcp->port >= 0); ret = create_server_socket(&dev_tcp->server_socket, dev_tcp->port); assert(ret == LIZ_OK); sprintf(fileName,"debug_liz_tcp_server_socket_%i.chkpt",getpid()); cr_request_file(fileName); /* * accept connections from other hosts */ int port = dev_tcp->port; if (dev_tcp->port == 0) { struct sockaddr_in sockaddr; socklen_t len = sizeof(struct sockaddr_in); ret = getsockname(dev_tcp->server_socket, (struct sockaddr *) &sockaddr, &len); assert(ret == 0); port = ntohs(sockaddr.sin_port); } dev_tcp->port = port; FD_SET(dev_tcp->server_socket, &dev_tcp->fdset); dev_tcp->maxfd = MAX(dev_tcp->maxfd, dev_tcp->server_socket + 1); dev_tcp->no_fds++; /* * set flags of server socket */ assert(dev_tcp->server_socket != -1); int flags; flags = fcntl(dev_tcp->server_socket, F_GETFL, 0); assert(flags != -1); // TODO: set correct socket options flags &= ~O_NONBLOCK; ret = fcntl(dev_tcp->server_socket, F_SETFL, flags); assert(ret == 0); liz_rank_t node; liz_rank_t turn; sprintf(fileName,"debug_liz_tcp_preconnect_%i.chkpt",getpid()); cr_request_file(fileName); for (turn = 0; turn < dev_tcp->no_connections; turn++) { if (turn == dev_tcp->rank) { /* * accept connections from all other nodes */ for (node = turn + 1; node < dev_tcp->no_connections; node++) { // *connection_t *c = &dev_tcp->connections[node]; // *if (!IS_CONNECTION_OPENED(c)) { // *fprintf(stderr, "accepting connection from rank " FMT_RANK()"... ", node); #ifdef DEBUG fprintf(stderr, "accepting connection... "); #endif int socket = accept(dev_tcp->server_socket, (struct sockaddr *) NULL, NULL); /* * read the rank of the node that issued the request to connect */ liz_rank_t rank; int ret = liz_read(socket, &rank, sizeof(liz_rank_t),&dev_tcp->cont); assert(ret == sizeof(liz_rank_t)); assert(rank < dev_tcp->no_connections); assert(rank >= 0); #ifdef DEBUG fprintf(stderr, "connected rank " FMT_INT()" (socket " FMT_INT()")\n", rank,socket); #endif connection_t *c = &dev_tcp->connections[rank]; if (socket >= 0) { c->state = CONNECTION_OPENED; c->socket = socket; #ifdef DEBUG c->rank = rank; #endif FD_SET(c->socket, &dev_tcp->fdset); dev_tcp->maxfd = MAX(dev_tcp->maxfd, c->socket + 1); dev_tcp->no_fds++; ret = internal_setsockopt(c); if (ret) return ret; } // *} } } else { /* * connect to all other nodes if not already connected */ for (node = 0; node <= turn; node++) { connection_t *c = &dev_tcp->connections[node]; if (dev_tcp->rank != node && !IS_CONNECTION_OPENED(c)) { #ifdef DEBUG fprintf(stderr, "opening connection to %s[" FMT_RANK()"]...", c->hostname, node); #endif /* * create a client socket connection to the remote host */ ret = create_client_socket(c, c->hostname, c->port); assert(ret == LIZ_OK); /* * now connect a local socket and the remote socket */ ret = internal_connect(c, node); assert(ret == 0); /* * send local rank to remote node */ #if DSM_CHECKPOINT int ret = liz_write(c->socket, &dev_tcp->rank, sizeof(liz_rank_t),0); #else int ret = liz_write(c->socket, &dev_tcp->rank, sizeof(liz_rank_t)); #endif assert(ret == sizeof(liz_rank_t)); c->state = CONNECTION_OPENED; #ifdef DEBUG c->rank = node; #endif FD_SET(c->socket, &dev_tcp->fdset); dev_tcp->maxfd = MAX(dev_tcp->maxfd, c->socket + 1); dev_tcp->no_fds++; } } } /* do checkpoint here */ sprintf(fileName,"debug_liz_tcp_turn_done_%i_%i.chkpt",turn,dev_tcp->rank); cr_request_file(fileName); } sprintf(fileName,"debug_liz_tcp_connections_up_%i.chkpt",getpid()); cr_request_file(fileName); #if CHECK_CONNECTIONS_EMPTY_AFTER_STARTUP /* * check the connections for debugging purposes */ check_connections(dev_tcp); #endif /* * start up receiver thread */ #ifdef TACO dev_tcp->thread = taco_thread_create((TTaco_Func) self->run_as_thread,self, NULL, NULL, NULL, NULL, 0, 0); #else liz_thread_create(&dev_tcp->thread, NULL, self->run_as_thread, self); #endif fprintf(stderr,"START_TCP_MODULE done\n"); DEBUG_LEAVE(); return LIZ_OK; } Nov 2 16:28:53 faui21l sichiwai: __________________________________________________________________________________________-- Nov 2 16:29:37 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning -22 Nov 2 16:29:37 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:37 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7595: : Preparing to dump 5 threads Nov 2 16:29:37 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7595: : Writing the fs struct... Nov 2 16:29:37 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7595: : Writing the open file section... Nov 2 16:29:37 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7595: : ...files_struct Nov 2 16:29:37 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7595: : ...files Nov 2 16:29:37 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning -11 Nov 2 16:29:37 faui21l kernel: cr_chkpt_done <cr_chkpt_req.c:893>, pid 7593: : cr_chkpt_done returning 1 Nov 2 16:29:37 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:37 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:37 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7596: : Preparing to dump 5 threads Nov 2 16:29:38 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7597: : Writing the fs struct... Nov 2 16:29:38 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7597: : Writing the open file section... Nov 2 16:29:38 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7597: : ...files_struct Nov 2 16:29:38 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7597: : ...files Nov 2 16:29:38 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:38 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:38 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7593: : Preparing to dump 5 threads Nov 2 16:29:38 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7597: : Writing the fs struct... Nov 2 16:29:38 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7597: : Writing the open file section... Nov 2 16:29:38 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7597: : ...files_struct Nov 2 16:29:38 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7597: : ...files Nov 2 16:29:38 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:38 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning -22 Nov 2 16:29:39 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7594: : Preparing to dump 5 threads Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7594: : Writing the fs struct... Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7594: : Writing the open file section... Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7594: : ...files_struct Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7594: : ...files Nov 2 16:29:39 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:39 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7595: : Preparing to dump 5 threads Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7595: : Writing the fs struct... Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7595: : Writing the open file section... Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7595: : ...files_struct Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7595: : ...files Nov 2 16:29:39 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:39 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7595: : Preparing to dump 5 threads Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7594: : Writing the fs struct... Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7594: : Writing the open file section... Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7594: : ...files_struct Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7594: : ...files Nov 2 16:29:39 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:39 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7596: : Preparing to dump 5 threads Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7596: : Writing the fs struct... Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7596: : Writing the open file section... Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7596: : ...files_struct Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7596: : ...files Nov 2 16:29:39 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:39 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7597: : Preparing to dump 5 threads Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7594: : Writing the fs struct... Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7594: : Writing the open file section... Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7594: : ...files_struct Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7594: : ...files Nov 2 16:29:39 faui21l kernel: Skipping a socket. Nov 2 16:29:39 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:39 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7596: : Preparing to dump 5 threads Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7596: : Writing the fs struct... Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7596: : Writing the open file section... Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7596: : ...files_struct Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7596: : ...files Nov 2 16:29:39 faui21l kernel: Skipping a socket. Nov 2 16:29:39 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:39 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning -22 Nov 2 16:29:39 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7602: : Preparing to dump 5 threads Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7601: : Writing the fs struct... Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7601: : Writing the open file section... Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7601: : ...files_struct Nov 2 16:29:39 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7601: : ...files Nov 2 16:29:39 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:39 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:39 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7601: : Preparing to dump 5 threads Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7601: : Writing the fs struct... Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7601: : Writing the open file section... Nov 2 16:29:40 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7601: : ...files_struct Nov 2 16:29:40 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7601: : ...files Nov 2 16:29:40 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:40 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7600: : Preparing to dump 5 threads Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7604: : Writing the fs struct... Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7604: : Writing the open file section... Nov 2 16:29:40 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7604: : ...files_struct Nov 2 16:29:40 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7604: : ...files Nov 2 16:29:40 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning -11 Nov 2 16:29:40 faui21l kernel: cr_chkpt_done <cr_chkpt_req.c:893>, pid 7600: : cr_chkpt_done returning 1 Nov 2 16:29:40 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:40 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7602: : Preparing to dump 5 threads Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7602: : Writing the fs struct... Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7602: : Writing the open file section... Nov 2 16:29:40 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7602: : ...files_struct Nov 2 16:29:40 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7602: : ...files Nov 2 16:29:40 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning -11 Nov 2 16:29:40 faui21l kernel: cr_chkpt_done <cr_chkpt_req.c:893>, pid 7600: : cr_chkpt_done returning 1 Nov 2 16:29:40 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:40 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7604: : Preparing to dump 5 threads Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7601: : Writing the fs struct... Nov 2 16:29:40 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7601: : Writing the open file section... Nov 2 16:29:40 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7601: : ...files_struct Nov 2 16:29:40 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7601: : ...files Nov 2 16:29:40 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:40 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning -22 Nov 2 16:29:41 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7601: : Preparing to dump 5 threads Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7603: : Writing the fs struct... Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7603: : Writing the open file section... Nov 2 16:29:41 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7603: : ...files_struct Nov 2 16:29:41 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7603: : ...files Nov 2 16:29:41 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:41 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7603: : Preparing to dump 5 threads Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7600: : Writing the fs struct... Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7600: : Writing the open file section... Nov 2 16:29:41 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7600: : ...files_struct Nov 2 16:29:41 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7600: : ...files Nov 2 16:29:41 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:41 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning -22 Nov 2 16:29:41 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7604: : Preparing to dump 5 threads Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7604: : Writing the fs struct... Nov 2 16:29:41 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7604: : Writing the open file section... Nov 2 16:29:41 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7604: : ...files_struct Nov 2 16:29:41 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7604: : ...files Nov 2 16:29:41 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning -22 Nov 2 16:29:42 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7603: : Preparing to dump 5 threads Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7602: : Writing the fs struct... Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7602: : Writing the open file section... Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7602: : ...files_struct Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7602: : ...files Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning -22 Nov 2 16:29:42 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:42 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7602: : Preparing to dump 5 threads Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7594: : Preparing to dump 5 threads Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7602: : Writing the fs struct... Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7602: : Writing the open file section... Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7602: : ...files_struct Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7602: : ...files Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7595: : Writing the fs struct... Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7595: : Writing the open file section... Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7595: : ...files_struct Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7595: : ...files Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:42 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7602: : Preparing to dump 5 threads Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning -22 Nov 2 16:29:42 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7595: : Preparing to dump 5 threads Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7602: : Writing the fs struct... Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7602: : Writing the open file section... Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7602: : ...files_struct Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7602: : ...files Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7597: : Writing the fs struct... Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7597: : Writing the open file section... Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7597: : ...files_struct Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7597: : ...files Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning 0 Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:42 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7593: : process 7593 checkpointing its own process 7593 Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7600: : cr_chkpt_reap returning -22 Nov 2 16:29:42 faui21l kernel: cr_chkpt_req <cr_chkpt_req.c:634>, pid 7600: : process 7600 checkpointing its own process 7600 Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7596: : Preparing to dump 5 threads Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1044>, pid 7602: : Preparing to dump 5 threads Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7594: : Writing the fs struct... Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7594: : Writing the open file section... Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7594: : ...files_struct Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7594: : ...files Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: cr_chkpt_reap <cr_chkpt_req.c:935>, pid 7593: : cr_chkpt_reap returning 0 Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1116>, pid 7601: : Writing the fs struct... Nov 2 16:29:42 faui21l kernel: cr_do_vmadump <cr_dump_self.c:1125>, pid 7601: : Writing the open file section... Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:862>, pid 7601: : ...files_struct Nov 2 16:29:42 faui21l kernel: cr_save_all_files <cr_dump_self.c:869>, pid 7601: : ...files Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:29:42 faui21l kernel: Skipping a socket. Nov 2 16:30:28 faui21l sichiwai: here it happenes _______________________________________ Nov 2 16:30:32 faui21l kernel: cr_rstrt_request_restart <cr_rstrt_req.c:678>, pid 7634: : cr_magic = 67 82, cr_version = 2, checkpoint_type = 1, num_threads = 5 Nov 2 16:30:32 faui21l kernel: cr_reserve_ids <cr_rstrt_req.c:448>, pid 7634: : Now reserving required ids... Nov 2 16:30:32 faui21l kernel: cr_rstrt_clones <cr_rstrt_req.c:3198>, pid 7635: : 7635: Have enough processes Nov 2 16:30:32 faui21l kernel: cr_rstrt_child <cr_rstrt_req.c:3262>, pid 7635: : 7635: Restoring credentials Nov 2 16:30:32 faui21l kernel: cr_rstrt_clones <cr_rstrt_req.c:3198>, pid 7636: : 7636: Have enough processes Nov 2 16:30:32 faui21l kernel: cr_rstrt_clones <cr_rstrt_req.c:3198>, pid 7637: : 7637: Have enough processes Nov 2 16:30:32 faui21l kernel: cr_rstrt_clones <cr_rstrt_req.c:3198>, pid 7638: : 7638: Have enough processes Nov 2 16:30:32 faui21l kernel: cr_rstrt_clones <cr_rstrt_req.c:3198>, pid 7639: : 7639: Have enough processes Nov 2 16:30:32 faui21l kernel: vmadump: mmap failed: /var/run/nscd/db5bHKnB (deleted) Nov 2 16:30:32 faui21l kernel: thaw_threads returned error, aborting. -2 Nov 2 16:30:32 faui21l kernel: vmadump: invalid signature Nov 2 16:30:32 faui21l kernel: thaw_threads returned error, aborting. -22 Nov 2 16:30:32 faui21l kernel: vmadump: invalid signature Nov 2 16:30:32 faui21l kernel: thaw_threads returned error, aborting. -22 Nov 2 16:30:32 faui21l kernel: vmadump: invalid signature Nov 2 16:30:32 faui21l kernel: thaw_threads returned error, aborting. -22 Nov 2 16:30:32 faui21l kernel: vmadump: invalid signature Nov 2 16:30:32 faui21l kernel: thaw_threads returned error, aborting. -22