/* phylo.c --- phylogenetic tree edit server Jim Blandy --- July 1994 */ #include #include #include #include #include #include /* for getpid */ #include "xmalloc.h" #include "hash.h" #include "lenstring.h" #include "node.h" #include "rdp_format.h" #include "getline.h" #ifdef __GNUC__ #define INLINE inline #else #define INLINE #endif void request_loop (void); void init_request_table (void); typedef void request_t(int ARGC, struct lenstring *ARGV); #define LOOKUP_REQUEST(table, string, string_len) \ ((request_t **) lookup_hash_table ((table), (string), (string_len))) #define LOOKUP_REQUEST_SOFT(table, string, string_len) \ ((request_t **) lookup_hash_table_soft ((table), (string), (string_len))) struct string_hash *requests; struct node *tree = 0; struct string_hash *tree_by_name = 0; /* Update this number whenever you make a change that the lisp code must be changed to co-operate with. */ #define PROTOCOL_VERSION "7" int main () { init_request_table (); request_loop (); return 0; } request_t check_version_request; request_t use_tree_request; request_t select_request; request_t select_range_request; request_t open_request; request_t open_all_request; request_t open_groups_request; request_t list_selected_request; request_t unselect_all_request; request_t open_selected_request; request_t open_subtree_request; /*&&& request decls */ void init_request_table () { requests = new_hash_table (); #define REQUEST(name, func) \ (*LOOKUP_REQUEST (requests, (name), sizeof (name) - 1) = (func)) REQUEST ("check-version", check_version_request); REQUEST ("use-tree", use_tree_request); REQUEST ("select", select_request); REQUEST ("select-range", select_range_request); REQUEST ("open", open_request); REQUEST ("open-all", open_all_request); REQUEST ("open-groups", open_groups_request); REQUEST ("list-selected", list_selected_request); REQUEST ("unselect-all", unselect_all_request); REQUEST ("open-selected", open_selected_request); REQUEST ("open-subtree", open_subtree_request); /*&&& request init */ #undef REQUEST } void request_loop () { for (;;) { int len; struct lenstring *words; char *words_text; /* Since we're probably talking to a pipe, stdout will be block-buffered. We have to explicitly request that the output from the last reply be sent. */ fflush (stdout); /* Read a request. */ len = getstrings (stdin, &words, &words_text); if (len == EOF) break; if (len > 0) { request_t **r = LOOKUP_REQUEST_SOFT (requests, words[0].text, words[0].len); if (r) (**r) (len, words); else printf ("(phylo-error \"unknown request `%s'\")\n", words[0].text); } free (words); free (words_text); } } /* Extensible buffers, with text properties */ struct textprop { size_t start, end; /* position for text properties */ char *plist; /* the plist, as a C string */ }; struct buf { char *text; /* the text in the buffer */ size_t len; /* the length of the text */ size_t size; /* the number of bytes pointed to by text */ struct textprop *props; size_t props_len; size_t props_size; }; /* Initialize BUF to represent the null string, and allocate some space to it. */ void buf_init (struct buf *buf) { buf->len = 0; buf->size = 5; /* to test out the buffer-growing code */ buf->text = (char *) xmalloc (sizeof (*buf->text) * buf->size); buf->props_len = 0; buf->props_size = 5; buf->props = (struct textprop *) xmalloc (sizeof (*buf->props) * buf->props_size); } /* Free the storage used by BUF. The caller must take care of freeing the plist strings; they'll usually be constants anyway, so this shouldn't be a big deal. */ void buf_free (struct buf *buf) { free (buf->text); free (buf->props); } /* Make sure that buf has at least SIZE chars allocated to it. */ void buf_assure (struct buf *buf, int size) { if (size >= buf->size) { /* Try doubling, but snap it to SIZE if that isn't enough. */ buf->size *= 2; if (size >= buf->size) buf->size = size; buf->text = (char *) xrealloc (buf->text, sizeof (*buf->text) * buf->size); } } /* Add N copies of the character C to the end of BUF. */ void buf_fill (struct buf *buf, size_t n, int c) { buf_assure (buf, buf->len + n); memset (buf->text + buf->len, c, n); buf->len += n; } /* Copy STRING to the end of BUF. */ void buf_str (struct buf *buf, char *string) { size_t len = strlen (string); buf_assure (buf, buf->len + len); memcpy (buf->text + buf->len, string, len); buf->len += len; } /* Copy LENSTRING to the end of BUF. */ void buf_lenstring (struct buf *buf, struct lenstring *lenstring) { buf_assure (buf, buf->len + lenstring->len); memcpy (buf->text + buf->len, lenstring->text, lenstring->len); buf->len += lenstring->len; } /* Put text properties PLIST on BUF, from START to END. */ void buf_putprop (struct buf *buf, size_t start, size_t end, char *plist) { if (buf->props_len >= buf->props_size) { buf->props_size *= 2; buf->props = (struct textprop *) xrealloc (buf->props, (sizeof (struct textprop) * buf->props_size)); } { struct textprop *p = &buf->props[buf->props_len++]; p->start = start; p->end = end; p->plist = plist; } } /* Write out BUF on STREAM as an Emacs Lisp string with text properties. */ void buf_display (struct buf *buf, FILE *stream) { if (buf->props_len == 0) write_quoted (stream, buf->text, buf->len); else { fputs ("#(", stream); write_quoted (stream, buf->text, buf->len); { int i; for (i = 0; i < buf->props_len; i++) { fprintf (stream, " %d %d %s", buf->props[i].start, buf->props[i].end, buf->props[i].plist); } } putc (')', stream); } } /* Formatting nodes, trees, and ranges. */ #define INDENTATION(node) (3 * (node)->depth) void format (struct node *node, struct buf *b) { int selected_p = (node->marks & SELECTED_M); int leaf_p = (node->num_children == 0); int open_p = (node->marks & OPEN_M); int children_selected_p = (node->marks & CHILDREN_SELECTED_M); buf_fill (b, INDENTATION (node), ' '); { size_t label_start = b->len; buf_lenstring (b, &node->name); if (leaf_p) buf_fill (b, ((node->name.len < 12) ? (13 - (int) node->name.len) : 1), ' '); else if (node->description.len > 0) buf_str (b, " "); buf_lenstring (b, &node->description); buf_putprop (b, label_start, b->len, (selected_p ? "(face phylo-selected mouse-face phylo-mouse-selected)" : "(mouse-face phylo-mouse-unselected)")); } if (leaf_p) buf_str (b, "\n"); else { size_t ellipsis_start = b->len; if (open_p) buf_str (b, " >>>\n"); else buf_str (b, " ...\n"); buf_putprop (b, ellipsis_start + 1, b->len - 1, (children_selected_p ? "(face phylo-selected mouse-face phylo-mouse-selected)" : "(mouse-face phylo-mouse-unselected)")); /* If the whole node is selected, we should highlight the space before the ellipsis, too, to make a solid block of selection. */ if (selected_p && children_selected_p) buf_putprop (b, ellipsis_start, ellipsis_start + 1, "(face phylo-selected)"); } } void format_tree (struct node *tree, struct buf *out) { int i; format (tree, out); if (tree->marks & OPEN_M) for (i = 0; i < tree->num_children; i++) format_tree (tree->children[i], out); } void format_range (struct node *first, struct node *last, struct buf *out) { for (;;) { if (first->marks & VISIBLE_M) format (first, out); if (first == last) break; first = first->next; assert (first); } } /* Redisplaying the tree. */ /* Here's our redisplay strategy. Each node structure has a member called "marks", containing flags like SELECTED_M and OPEN_M. It also has a member called "prev_marks", which holds the value of "marks" the last time the display was brought up-to-date. The tree manipulation routines select, unselect, open, and close nodes by fiddling with "marks". The redisplay routines make a pass over the tree, compare "marks" with "prev_marks", send the replies necessary to update the display, and then copy each node's "marks" into its "prev_marks". Given this info, how do we decide what replies to send to redraw the tree? Let T be the set of nodes in the tree. Let U be the subset of T containing those nodes whose display has not been changed by the last request --- i.e. their visibility and selectedness are the same, and whether or not they have any selected children hasn't changed. When the redisplay is done, the nodes U are going to still be there on the screen; nodes above or below them may change, but their text is going to be unchanged. If you imagine all the nodes in T laid out in display order, U divides T up into stretches of changed nodes. (If U is empty, all of T is one big stretch. If U == T, there are zero stretches of changed nodes.) We walk T, noting the runs of changed nodes. For a given run R, we find the first and last nodes in R that were visible, and send a "phylo-display-range" to replace them with those nodes in R that are now visible. If a run has no previously visible nodes, well, that will require special treatment. However, that would correspond to a bunch of text being deleted or inserted, and the text above and below it remaining unchanged (because otherwise it would be part of the run). This never happens in the current phylo browser --- whenever you close a node, the parent's ellipsis changes. It also happens when the entire tree is new --- i.e. after the first use-tree request. This algorithm doesn't handle that case. */ /* Recompute the VISIBLE marks on TREE. Assume that all parents of NODE are open iff VISIBLE is non-zero. */ void update_visible (struct node *tree, int visible) { int i; if (visible) tree->marks |= VISIBLE_M; else tree->marks &= ~VISIBLE_M; visible = visible && (tree->marks & OPEN_M); for (i = 0; i < tree->num_children; i++) update_visible (tree->children[i], visible); } /* Update the interior SELECTED marks in TREE. Return non-zero iff all nodes in TREE are selected. */ int update_selected (struct node *tree) { if (tree->num_children > 0) { int all = 1; int i; for (i = 0; i < tree->num_children; i++) all = update_selected (tree->children[i]) && all; if (all) tree->marks |= SELECTED_M; else tree->marks &= ~SELECTED_M; return all; } else return tree->marks & SELECTED_M; } /* Update the CHILDREN_SELECTED marks in TREE. Return non_zero iff any nodes in TREE are selected. */ int update_children_selected (struct node *tree) { if (tree->num_children > 0) { int any = 0; int i; for (i = 0; i < tree->num_children; i++) any = update_children_selected (tree->children[i]) || any; if (any) tree->marks |= CHILDREN_SELECTED_M; else tree->marks &= ~CHILDREN_SELECTED_M; return any; } else return tree->marks & SELECTED_M; } /* Return non-zero iff NODE has changed since the last redisplay --- that is, iff the significant marks differ in marks and prev_marks. */ INLINE int node_changed_p (struct node *node) { const enum mark_masks significant = (OPEN_M | VISIBLE_M | SELECTED_M | CHILDREN_SELECTED_M); return ((node->marks & significant) != (node->prev_marks & significant)); } /* Print, as an Emacs Lisp string, the visible nodes that fall between START and END. Send the output to the file OUT. */ void display_range (struct node *start, struct node *end, FILE *out) { struct buf buf; buf_init (&buf); format_range (start, end, &buf); buf_display (&buf, out); buf_free (&buf); } /* Print, as an Emacs Lisp string, the visible nodes in the tree. */ void display_tree (struct node *tree, FILE *out) { struct buf buf; buf_init (&buf); format_tree (tree, &buf); buf_display (&buf, out); buf_free (&buf); } /* Redisplay the run from START to END. */ void redisplay_run (struct node *start, struct node *end) { /* The first and last previously visible nodes in START .. END. We could compute these in scan_and_redisplay, but it's only a constant time difference. */ struct node *first_visible, *last_visible; struct node *n; first_visible = 0; n = start; for (;;) { if (n->prev_marks & VISIBLE_M) { if (! first_visible) first_visible = n; last_visible = n; } if (n == end) break; n = n->next; assert (n); } /* I think there should always be at least one visible node in any changed run. */ assert (first_visible); printf ("(phylo-display-range "); write_quoted_lenstring (stdout, &first_visible->name); putchar (' '); write_quoted_lenstring (stdout, &last_visible->name); putchar (' '); display_range (start, end, stdout); puts (")"); } /* Walk the tree in display order, noting the changed runs, and redisplaying them. This assumes that all the marks are accurate. */ void scan_and_redisplay () { struct node *n; struct node *run_start, *run_end; run_start = 0; for (n = tree; n; n = n->next) if ((n->marks & VISIBLE_M) | (n->prev_marks & VISIBLE_M)) if (node_changed_p (n)) { if (! run_start) run_start = n; /* Start a new run. */ run_end = n; } else { if ((n->marks & VISIBLE_M) && run_start) /* We've finished a run. Redisplay it. */ { redisplay_run (run_start, run_end); run_start = 0; } } if (run_start) redisplay_run (run_start, run_end); } /* For each node in TREE, copy marks to prev_marks, to flag the tree as updated. */ void mark_as_accurate (struct node *tree) { int i; tree->prev_marks = tree->marks; for (i = 0; i < tree->num_children; i++) mark_as_accurate (tree->children[i]); } /* Update the display. See the notes at the top of the page. This handles any number of openings, closings, selectings, child-selectings, etcetera. It sends the proper replies to reflect those changes that have occurred since the last call to redisplay. */ void redisplay () { update_visible (tree, 1); update_selected (tree); update_children_selected (tree); scan_and_redisplay (); mark_as_accurate (tree); } /* A special case --- redisplay the whole tree, by erasing and redrawing. */ void redisplay_all () { update_visible (tree, 1); update_selected (tree); update_children_selected (tree); printf ("(phylo-display-all "); display_tree (tree, stdout); printf (")\n"); mark_as_accurate (tree); } /* Helper functions for the request functions. */ void report_syserr () { const char *message = strerror (errno); printf ("(phylo-error "); write_quoted (stdout, message, strlen (message)); puts (")"); } /* Make sure that the user has selected a tree to use. */ int check_tree () { if (! tree) { printf ("(phylo-error \"no tree chosen yet\")\n"); return 0; } return 1; } #define CHECK_TREE do { if (! check_tree ()) return; } while (0) /* Make sure that the request was given N arguments; use REQUEST_NAME in the error message. */ int check_argc (int argc, int n, char *request_name) { if (argc != n) { printf ("(phylo-error \"wrong number of args to `%s' request\")\n", request_name); return 0; } return 1; } #define CHECK_ARGC(n, request_name) \ do { if (! check_argc ((argc), (n), (request_name))) return; } while (0) /* Look up a node by NAME, and set VAR to point to a pointer to it. */ struct node * check_node_name (struct lenstring *name) { struct node **node_ptr = LOOKUP_NODE_SOFT (tree_by_name, name->text, name->len); if (! node_ptr) { printf ("(phylo-error \"no node by that name\")\n"); return 0; } return *node_ptr; } #define CHECK_NODE_NAME(var, name) \ do { if (! ((var) = check_node_name (&(name)))) return; } while (0) /* Set marks on nodes in the subtree rooted at NODE. MASK is the set of marks to be affected; all other marks are left unchanged. MARKS specifies the values they should have. For example, to set the OPEN mark and clear the SELECTED mark, you could say tree_set (tree, OPEN_M | SELECTED_M, OPEN_M). Some of the hardware registers on the Commodore 64 worked this way. */ void tree_set (struct node *node, enum mark_masks mask, enum mark_masks marks) { int i; node->marks = (node->marks & ~mask) | marks; for (i = 0; i < node->num_children; i++) tree_set (node->children[i], mask, marks); } /* For all nodes in the subtree rooted at NODE, If the marks in TEST_MASK have the values given in TEST_MARKS, Set the marks in MASK to the values given in MARKS. The masks and marks work as with tree_set. */ void tree_set_if (struct node *node, enum mark_masks test_mask, enum mark_masks test_marks, enum mark_masks mask, enum mark_masks marks) { int i; if ((node->marks & test_mask) == test_marks) node->marks = (node->marks & ~mask) | marks; for (i = 0; i < node->num_children; i++) tree_set_if (node->children[i], test_mask, test_marks, mask, marks); } /* The check-version request. */ void check_version_request (int argc, struct lenstring *argv) { CHECK_ARGC (2, "check-version"); if (strcmp (PROTOCOL_VERSION, argv[1].text)) printf ("(phylo-error " "\"Mismatched client (version %s) and server (version %s)\")\n", argv[1].text, PROTOCOL_VERSION); else printf ("(phylo-okay)\n"); } /* The use-tree request. */ void free_node (struct node *node) { if (node->name.len > 0) free (node->name.text); if (node->description.len > 0) free (node->description.text); if (node->children) free (node->children); free (node); } int read_tree (char *filename) { /* Free the old tree. */ if (tree) traverse_post (tree, (node_function_t) free_node); tree = 0; if (tree_by_name) free_hash_table (tree_by_name); tree_by_name = 0; /* Try to read in the new tree. */ { struct node *new = read_rdp (filename); if (new == &rdp_syserror) { report_syserr (); return 0; } else if (new == &rdp_parseerror) { printf ("(phylo-error \"`%s' is a malformed RDP `.phylo' file\")\n", filename); return 0; } tree = new; tree_by_name = hash_node_names (tree); } return 1; } /* Remove leaves from TREE that lack the IN_FILE mark. Free subtrees that have no descendants. Return non-zero iff TREE has any descendants to keep it alive, and was not freed. */ int minimize_tree (struct node *tree) { { int i, j; for (i = j = 0; i < tree->num_children; i++) if (minimize_tree (tree->children[i])) { struct node *child = tree->children[i]; child->child_index = j; tree->children[j] = child; j++; } tree->num_children = j; } if (tree->num_children > 0 || tree->marks & IN_FILE_M) return 1; else { free_node (tree); return 0; } } int mark_organisms (char *filename) { FILE *in = fopen (filename, "r"); int all_found; struct lenstring missing; char *buf; int buf_len; if (! in) { report_syserr (); return 0; } /* Clear the IN_FILE mark on all nodes first. */ tree_set (tree, IN_FILE_M, 0); all_found = 1; missing.len = 0; missing.text = 0; while ((buf_len = getline (in, &buf)) != EOF) { struct node **node = LOOKUP_NODE_SOFT (tree_by_name, buf, buf_len); if (! node) { all_found = 0; if (missing.text) append_to_lenstring (&missing, ", ", 2); else append_to_lenstring (&missing, "warning: tree doesn't include ", 30 /*ick*/); append_to_lenstring (&missing, buf, buf_len); } else (*node)->marks |= IN_FILE_M; free (buf); } if (ferror (in)) { report_syserr (); if (missing.len) free (missing.text); fclose (in); return 0; } if (! all_found) { printf ("(phylo-message "); write_quoted_lenstring (stdout, &missing); printf (")\n"); free (missing.text); } if (minimize_tree (tree)) { /* Rebuild the hash table for the minimized tree. */ free_hash_table (tree_by_name); tree_by_name = hash_node_names (tree); } else { printf ("(phylo-message \"warning: " "no organisms appear in the tree\")\n"); free_hash_table (tree_by_name); tree = 0; tree_by_name = 0; } fclose (in); return 1; } /* Close those nodes of TREE that contain only leaves. */ void close_leaf_only (struct node *tree) { int i; tree->marks &= ~OPEN_M; for (i = 0; i < tree->num_children; i++) if (tree->children[i]->num_children != 0) { tree->marks |= OPEN_M; close_leaf_only (tree->children[i]); } } void use_tree_request (int argc, struct lenstring *argv) { CHECK_ARGC (3, "use-tree"); if (! read_tree (argv[1].text)) return; if (! mark_organisms (argv[2].text)) return; if (tree) { thread_tree (tree, 0); close_leaf_only (tree); redisplay_all (); } printf ("(phylo-okay)\n"); return; } /* The select request. */ void select_node (struct node *node) { /* Toggle the selectedness of node, and then select all the leaves under node iff node is selected. */ node->marks ^= SELECTED_M; if (node->marks & SELECTED_M) tree_set (node, SELECTED_M, SELECTED_M); else tree_set (node, SELECTED_M, 0); redisplay (); } void select_request (int argc, struct lenstring *argv) { struct node *node; CHECK_TREE; CHECK_ARGC (2, "select"); CHECK_NODE_NAME (node, argv[1]); select_node (node); printf ("(phylo-okay)\n"); } /* The select-range request. */ void select_range (struct node *start, struct node *end) { enum mark_masks selected_p = (start->marks & SELECTED_M) ? 0 : (SELECTED_M | CHILDREN_SELECTED_M); struct node *first, *last; struct node *node; if (! (node_visible_p (start) && node_visible_p (end))) { printf ("(phylo-error \"start- or end-point not visible\")\n"); return; } if (node_before_p (start, end)) first = start, last = end; else first = end, last = start; node = first; for (;;) { /* If node is an apparent leaf (a leaf or a closed internal node), select it according to selected_p. */ if (node->num_children == 0 || (node->marks & OPEN_M) == 0) tree_set (node, (SELECTED_M | CHILDREN_SELECTED_M), selected_p); if (node == last) break; node = next_visible_node (node); assert (node); } redisplay (); } void select_range_request (int argc, struct lenstring *argv) { struct node *start, *end; CHECK_TREE; CHECK_ARGC (3, "select-range"); CHECK_NODE_NAME (start, argv[1]); CHECK_NODE_NAME (end, argv[2]); if (start == end) select_node (start); else select_range (start, end); printf ("(phylo-okay)\n"); } /* The open request. */ void open_request (int argc, struct lenstring *argv) { struct node *node; CHECK_TREE; CHECK_ARGC (2, "open"); CHECK_NODE_NAME (node, argv[1]); node->marks ^= OPEN_M; redisplay (); printf ("(phylo-okay)\n"); } /* The open-all request. */ void open_all_request (int argc, struct lenstring *argv) { CHECK_TREE; CHECK_ARGC (1, "open-all"); tree_set (tree, OPEN_M, OPEN_M); redisplay (); printf ("(phylo-okay)\n"); } /* The open-groups request. */ void open_groups_request (int argc, struct lenstring *argv) { CHECK_TREE; CHECK_ARGC (1, "open-groups"); close_leaf_only (tree); redisplay (); printf ("(phylo-okay)\n"); } /* The list-selected request. */ void list_selected (struct node *tree, FILE *file) { if (tree->num_children) { int i; for (i = 0; i < tree->num_children; i++) list_selected (tree->children[i], file); } else if (tree->marks & SELECTED_M) { fwrite (tree->name.text, sizeof (tree->name.text[0]), tree->name.len, file); putc ('\n', file); } } void list_selected_request (int argc, struct lenstring *argv) { CHECK_TREE; CHECK_ARGC (2, "list-selected"); { char *filename = argv[1].text; FILE *file = fopen (filename, "w"); if (! file) { report_syserr (); return; } list_selected (tree, file); fclose (file); printf ("(phylo-okay)\n"); } } /* The unselect-all request. */ void unselect_all_request (int argc, struct lenstring *argv) { CHECK_TREE; CHECK_ARGC (1, "unselect-all"); tree_set (tree, SELECTED_M, 0); redisplay (); printf ("(phylo-okay)\n"); } /* The open-selected request. */ void open_selected (struct node *tree) { if (tree->marks & CHILDREN_SELECTED_M) tree->marks |= OPEN_M; else tree->marks &= ~OPEN_M; { int i; for (i = 0; i < tree->num_children; i++) open_selected (tree->children[i]); } } void open_selected_request (int argc, struct lenstring *argv) { CHECK_TREE; CHECK_ARGC (1, "open-selected"); update_children_selected (tree); open_selected (tree); redisplay (); printf ("(phylo-okay)\n"); } /* The open-subtree request. */ void open_subtree_request (int argc, struct lenstring *argv) { struct node *subtree; CHECK_TREE; CHECK_ARGC (2, "open-subtree"); CHECK_NODE_NAME (subtree, argv[1]); tree_set (subtree, OPEN_M, OPEN_M); redisplay (); printf ("(phylo-okay)\n"); } /* &&& request def */ /* In phylo.c.el there is Elisp code to help add requests. * * That code used to live here, in a '#if 0 ... #endif' block, but * apparently GCC has started paying more attention to the stuff * inside those blocks, because with GCC 3.3.1 I get this compilation * error: * * cd ~/src/ale/ * make * cd phylo; make all * make[1]: Entering directory `/home/kfogel/src/ale/phylo' * gcc -c -g -O2 phylo.c * phylo.c:1248:21: missing terminating " character * phylo.c:1256:2: missing terminating " character * make[1]: *** [phylo.o] Error 1 * make[1]: Leaving directory `/home/kfogel/src/ale/phylo' * make: *** [all] Error 2 * * Compilation exited abnormally with code 2 at Sun Nov 23 12:36:59 * * ... due to GCC apparently trying to eat Elisp code. Hrmph. We * told it not to try to eat that code, why did it insist? */