ACE ubertagging/C question: confused about how to debug

I am trying to work with ACE source code (to do some experiments with the supertaggers I trained for the ERG), and I am not very experienced with C. As a result, I am finding myself quite confused, and I was hoping someone (@sweaglesw ?) might help.

I am very confused about how the ubertagging option gets triggered and used by the parser. Below my two typical test cases, one without the ubertagging option and using the grammar image which did not have access to the ubertagging model paths, and the second one with the ubertagging option and with the appropriately compiled grammar:

./ace -g ~/delphin/erg/trunk/ace/erg.dat sentences.txt
./ace -g ~/delphin/erg/trunk/ace/english-ut.dat sentences.txt --ubertagging=0.0001

My goal is to be able to step through the program execution in both cases, so that I can understand the differences well and then after that add my own code for the new supertagger option.

Now, I am confused about what is going on. Below the code which is responsible for using the ubertagger:

#include	"ubertag.h"
extern struct ubertagger	*the_ubertagger;
extern int	enable_ubertagging;
char *st_file = "supertags.txt";
printf("Ubertagging enabled: %f\n",enable_ubertagging);
	if(g_profiling)start_and_alloc_profiler(&ubertagging_profiler, "übertagging", parse_profiler, lexical_filtering_profiler);
	if(the_ubertagger && enable_ubertagging)
		ubertag_lattice(the_ubertagger, lexical_chart, log(ubertagging_threshold));
		printf("Lexical chart after ubertagging:\n");
		print_lexical_chart(lexical_chart);

The above code is part of a function called parse_with_token_chart. I paste the whole function below. Note the unusual (?) indentation of the code which includes the ubertagger header file. It is in the middle of the function but it is indented as if it were external to the function scope (maybe because the extern keyword is used? I am not familiar with this…)

int parse_with_token_chart(struct lattice	*token_chart, clock_t	start)
{
	int		i, count, cforest;
	int		num_parses = 0, num_entries;
	float	t_setup, t_forest, t_unpack;
	struct dg	*root;
	struct dg	*sem;
	struct edge	*edge;

	if(do_itsdb)itsdb_dump_tokens(":p-input", ":ninputs", token_chart);
	apply_token_mapping(token_chart);
	sort_lattice(token_chart);
	if(!yy_mode && do_improve_characterization)
		improve_token_lattice_characterization(token_chart);

	if(token_chart->nvertices>give_up_threshold)
	{
		fprintf(stderr, "NOTE: giving up, too many words (%d+)\n", give_up_threshold);
		itsdb_error("too many words");
		if(do_itsdb)itsdb_report_parse(token_chart, NULL, -2, 0);
		stop_timer(chart_setup_timer, 1);
		return -2;
	}

	if(trace)print_token_chart(token_chart);

	if(trace)printf("finished token mapping\n");

	if(g_profiling)start_and_alloc_profiler(&lexical_lookup_profiler, "lexical lookup", parse_profiler, token_mapping_profiler);

	// do lexical lookup
	struct lattice	*lexical_chart = lexical_lookup_into_chart(token_chart);
	if(!lexical_chart)
	{
		fprintf(stderr, "NOTE: failed to build lexical chart\n");
		stop_timer(chart_setup_timer, 1);
		return -1;
	}
	if(trace)printf("finished lexical lookup\n");

	if(g_profiling)start_and_alloc_profiler(&lexical_parsing_profiler, "lexical parsing", parse_profiler, lexical_lookup_profiler);

	extern int chart_size;
	chart_size = 0;	// set to a predictable value rather than leaving it whatever the last sentence had; used in deciding whether an edge is spanning or not, as early as lexical parsing, for deciding whether the rule_root_utc[] is applicable.

	extern int reduce_chart_before_lexical_parsing;

	if(!reduce_chart_before_lexical_parsing)
	{
		// do lexical parsing
		int rv = lexical_parse_lattice(lexical_chart);
		if(rv == -1)
		{
			itsdb_error("ran out of RAM in lexical parsing");
			return -1;	// ran out of memory in lexical parsing!
		}
	}

	if(reduce_lexical_lattice(lexical_chart, token_chart))
	{
		fprintf(stderr, "NOTE: post reduction gap\n");
		stop_timer(chart_setup_timer, 1);
		if(do_itsdb)itsdb_report_parse(token_chart, lexical_chart, -2, 0);
		return -2;
	}

	if(reduce_chart_before_lexical_parsing)
		lexical_parse_lattice(lexical_chart);

	if(trace)print_lexical_chart(lexical_chart);
	if(trace)printf("finished lexical parsing\n");

	if(g_profiling)start_and_alloc_profiler(&lexical_filtering_profiler, "lexical filtering", parse_profiler, lexical_parsing_profiler);

	// do lexical filtering
	apply_lexical_filtering(lexical_chart);
	if(trace)printf("finished lexical filtering\n");

#include	"ubertag.h"
extern struct ubertagger	*the_ubertagger;
extern int	enable_ubertagging;
char *st_file = "supertags.txt";
//printf("%f\n",enable_ubertagging);
	if(g_profiling)start_and_alloc_profiler(&ubertagging_profiler, "übertagging", parse_profiler, lexical_filtering_profiler);
	if(the_ubertagger && enable_ubertagging)
		ubertag_lattice(the_ubertagger, lexical_chart, log(ubertagging_threshold));
		printf("Lexical chart after ubertagging:\n");
		print_lexical_chart(lexical_chart);

	
	if(g_profiling)start_and_alloc_profiler(&chart_parsing_profiler, "chart parsing", parse_profiler, ubertagging_profiler);

	// XXX far-fetched experiment...
	//first_pass_parse(lexical_chart);
	//return 1;

	// lexemes have no packing restrictor applied to them thus far.
	// make sure all copy()'s from here on out (until unpacking time) are restricting.
	if(!inhibit_pack)enable_packing(1);
	//print_lexical_chart(lexical_chart);
	struct supertagger	*the_supertagger = load_supertagger(st_file); //OZ: This is new code; does nothing at the moment.
	// setup the main parse chart
	int nwords = prepare_parse_chart(token_chart, lexical_chart);
	if(nwords < 0)
	{
		fprintf(stderr, "NOTE: negative lexical chart length\n");
		itsdb_error("negative lexical chart length");
		if(do_itsdb)itsdb_report_parse(token_chart, lexical_chart, -2, 0);
		stop_timer(chart_setup_timer, 1);
		enable_packing(0);
		return -2;
	}
	assert(nwords >= 0);
	if(!nwords)
	{
		fprintf(stderr, "NOTE: nothing to parse\n");
		itsdb_error("nothing to parse");
		if(do_itsdb)itsdb_report_parse(token_chart, lexical_chart, -2, 0);
		stop_timer(chart_setup_timer, 1);
		enable_packing(0);
		return -2;
	}

	// XXX experimental chart-pruning-like idea
	predict_rule_uses(nwords, lexical_chart);

	cancel_task = 0;
	did_timeout = 0;
	signal(SIGALRM, alarm_handler);

	if(timeout_seconds > 0)
		alarm(timeout_seconds);

	stop_timer(chart_setup_timer, 1);
	static int chart_parsing_timer = -1;
	if(chart_parsing_timer == -1)chart_parsing_timer = new_timer("chart parsing");
	start_timer(chart_parsing_timer);

	t_setup = timer();

	// do the actual parsing
	int	half = nwords/2;
	double t_half;
	while( !cancel_task && (edge = next_agenda(0)) )
	{
		if(edge->to-edge->from >= half)
		{
			half = nwords+2;
			t_half = timer();
			//halftime_analysis();
		}
		if(parse_process_edge(edge))break;
	}

	compact_generalization_edges();

	enable_packing(0);

	stop_timer(chart_parsing_timer, 1);

	if(g_profiling)start_and_alloc_profiler(&unpacking_profiler, "unpacking", parse_profiler, chart_parsing_profiler);

	t_forest = t_half + timer();
	cforest = clock() - start;

	if(do_forest)
	{
		char	*sentence = current_sentence;
		fflush(stdout);
		if(sentence)fprintf(stderr, "SENT: %s\n", sentence);
		int found_root = output_forest(&cells[chart_size-1], token_chart);
		fflush(stdout);
		fprintf(stderr, "NOTE: %d readings [forest], ", found_root);
		print_slab_stats();
		fprintf(stderr, "\n");
		num_parses = 1;
	}
	else if(!do_itsdb && !lui_mode)
	{
		didsent = 0;
		if(chart_size>0)
			num_parses = iterate_cell_root_hypotheses(&cells[chart_size-1], parse_show_result, best_only);
		else num_parses = 0;

		fprintf(stderr, "NOTE: %d readings, added %d / %d edges to chart (%d fully instantiated, %d actives used, %d passives used)\t",
			num_parses, total_edges, real_edges, passive_edges, used_edges, pused_edges);
		print_slab_stats();

		//rule_profiler();	// XXX comment me out normally
	}
	else if(do_itsdb)
	{
		num_parses = itsdb_report_parse(token_chart, lexical_chart, 0, (int)((long long)cforest*1000 / CLOCKS_PER_SEC));
	}
	else if(lui_mode)
	{
		num_parses = output_lui_trees(cells+chart_size-1, current_sentence, best_only, 0);
		if(!num_parses){}//output_lui_chart(words);
		last_token_chart = token_chart;
	}
	alarm(0);

//	extern int	output_edge_vectors;
//	if(output_edge_vectors)
//		do_output_edge_vectors();

	t_unpack = timer();

	used_total += used_edges;
	pused_total += pused_edges;

	if(0)
	{
		static FILE	*f = 0;
		//extern long long ndecomposed;
		//static long long ond = 0;
		if(!f)f = fopen("/tmp/times", "w");
		fprintf(f, "%d	%d	%f	%f	%f	%s\n", nwords, num_parses, t_setup, t_forest, t_unpack, current_sentence);
		//ond = ndecomposed;
		fflush(f);
	}

	return num_parses;
}

What is confusing to me is, no matter whether I use the first or the second command, the program still executes at least some(??) of the statements which are in the if(the_ubertagger && enable_ubertagging). In the below screenshot you can observe the execution being stopped at the breakpoint on line 532 (after somehow not stopping at line 531), and you can observe that the value of the variable enable_ubertagging is definitely 0 (false). The the_ubertagger pointer is 0x0 also.

Can anyone help me understand what is going on? How is this possible? I am using VS Code but I am observing the same output if I run the program via the terminal. Is there something special about these extern variables which is making this possible? (Or is this an indication that I am looking not at what I think I am looking or something like that?)

There are no {} braces! So only the first statement is in the scope of the if-block. I was blind to that. I guess I was confused by the indentation of the extern statements and started treating indentation as meaningful at that point (lines 532-533 were added by me of course).

1 Like

You probably worked this out already, but without the brackets, only the next command executes. The indentation is not meaningful :slight_smile: