From 83022d18b7d10f528c15a62969227c16c1c1ddf9 Mon Sep 17 00:00:00 2001 From: Brian Ewins Date: Sun, 19 Apr 2009 11:07:53 +0100 Subject: [PATCH] Fix bug 3188, text selection across table cells Bug 3188. When selecting text, poppler goes across the whole page then down, rather than across each cell, down that cell, then across to the next cell. This leads to illegible paste results. Teach TextPage to visit the selection in reading order rather than block order. Attempt to start and end the selection in a block containing one of the points, or if there is no such block, a nearby block. When selecting text in reading order, it does not make sense to preserve formatting (since this places text out of order). The code that reformatted the selection text into columns has been removed. --- poppler/TextOutputDev.cc | 355 +++++++++++++++++++++++++++------------------- 1 files changed, 206 insertions(+), 149 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index c0599ce..742a92a 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -3520,14 +3520,12 @@ void TextSelectionDumper::visitLine (TextLine *line, GooString *TextSelectionDumper::getText (void) { - GBool oneRot = gTrue; GooString *s; TextLineFrag *frag; - int i, col; - GBool multiLine; + int i; UnicodeMap *uMap; - char space[8], eol[16]; - int spaceLen, eolLen; + char eol[16]; + int eolLen; s = new GooString(); @@ -3536,51 +3534,12 @@ GooString *TextSelectionDumper::getText (void) if (uMap == NULL) return s; - spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); - if (nFrags > 0) { - for (i = 0; i < nFrags; ++i) { - frags[i].computeCoords(oneRot); - } - page->assignColumns(frags, nFrags, oneRot); - - // if all lines in the region have the same rotation, use it; - // otherwise, use the page's primary rotation - if (oneRot) { - qsort(frags, nFrags, sizeof(TextLineFrag), - &TextLineFrag::cmpYXLineRot); - } else { - qsort(frags, nFrags, sizeof(TextLineFrag), - &TextLineFrag::cmpYXPrimaryRot); - } - - col = 0; - multiLine = gFalse; - for (i = 0; i < nFrags; ++i) { - frag = &frags[i]; - - // insert a return - if (frag->col < col || - (i > 0 && fabs(frag->base - frags[i-1].base) > - maxIntraLineDelta * frags[i-1].line->words->fontSize)) { - s->append(eol, eolLen); - col = 0; - multiLine = gTrue; - } - - // column alignment - for (; col < frag->col; ++col) { - s->append(space, spaceLen); - } - - // get the fragment text - col += page->dumpFragment(frag->line->text + frag->start, frag->len, uMap, s); - } - - if (multiLine) { - s->append(eol, eolLen); - } + for (i = 0; i < nFrags; ++i) { + frag = &frags[i]; + page->dumpFragment(frag->line->text + frag->start, frag->len, uMap, s); + s->append(eol, eolLen); } uMap->decRefCnt(); @@ -3858,52 +3817,97 @@ void TextLine::visitSelection(TextSelectionVisitor *visitor, void TextBlock::visitSelection(TextSelectionVisitor *visitor, PDFRectangle *selection, SelectionStyle style) { - TextLine *p, *begin, *end; PDFRectangle child_selection; - double start_x, start_y, stop_x, stop_y; - - begin = NULL; - end = NULL; - start_x = selection->x1; - start_y = selection->y1; - stop_x = selection->x2; - stop_y = selection->y2; - - for (p = lines; p != NULL; p = p->next) { - if (selection->x1 < p->xMax && selection->y1 < p->yMax && - selection->x2 < p->xMax && selection->y2 < p->yMax && begin == NULL) { - begin = p; - if (selection->x1 < selection->x2) { - start_x = selection->x1; - start_y = selection->y1; - stop_x = selection->x2; - stop_y = selection->y2; - } else { - start_x = selection->x2; - start_y = selection->y2; - stop_x = selection->x1; - stop_y = selection->y1; + double x[2], y[2]; + TextLine *p, *first_line[2], *mid_line[2], *last_line[2], *begin, *end; + int i, count = 0, first_count[2], last_count[2], start, stop; + + x[0] = selection->x1; + y[0] = selection->y1; + x[1] = selection->x2; + y[1] = selection->y2; + + for (i = 0; i < 2; i++) { + first_line[i] = NULL; + first_count[i] = 0; + last_line[i] = NULL; + last_count[i] = 0; + mid_line[i] = NULL; + } + + // for each of the two points in the selection, track 3 lines: + // the 'mid' line contains the point; the 'first' line - + // which is the mid line if set, or the first line that + // ends below and right of the point; and the 'last' line, + // which is the first line if set, or the last line that + // begins above and left of the point. By construction these + // will always be read in the order first, mid, last. + for (p = this->lines; p && + (!mid_line[0] || !mid_line[1]); p = p->next) { + count++; + for (i = 0; i < 2; i++) { + if (!mid_line[i] && + x[i] >= p->xMin && + y[i] >= p->yMin && + x[i] < p->xMax && + y[i] < p->yMax) { + mid_line[i] = p; + } + if (p == mid_line[i] || + (!first_line[i] && + x[i] < p->xMax && + y[i] < p->yMax)) { + first_line[i] = p; + first_count[i] = count; + } + // force the last line to always be last + if (p == first_line[i] || + (!mid_line[i] && + x[i] >= p->xMin && + y[i] >= p->yMin)) { + last_line[i] = p; + last_count[i] = count; } - } else if (selection->x1 < p->xMax && selection->y1 < p->yMax && begin == NULL) { - begin = p; - start_x = selection->x1; - start_y = selection->y1; - stop_x = selection->x2; - stop_y = selection->y2; - } else if (selection->x2 < p->xMax && selection->y2 < p->yMax && begin == NULL) { - begin = p; - start_x = selection->x2; - start_y = selection->y2; - stop_x = selection->x1; - stop_y = selection->y1; } + } - if (((selection->x1 > p->xMin && selection->y1 > p->yMin) || - (selection->x2 > p->xMin && selection->y2 > p->yMin)) - && (begin != NULL)) - end = p->next; + // In the margins, all of the tracked lines + // may be null. On the basis that a user making + // a selection was trying to select *something*, + // guess an appropriate line. + // This leaves the bottom left as the only place + // where first or last are null. + for (i = 0; i < 2; i++) { + if (!first_line[i] && !last_line[i] && + y[i] <= this->lines->yMin) { + first_line[i] = this->lines; + first_count[i] = 1; + } + if (first_line[i] && !last_line[i]) { + last_line[i] = first_line[i]; + last_count[i] = first_count[i]; + } else if (!first_line[i] && last_line[i]) { + first_line[i] = last_line[i]; + first_count[i] = last_count[i]; + } } + // Now decide which point was first. + if (first_line[0] && + (!first_line[1] || + first_count[0] <= first_count[1])) { + start = 0; + stop = 1; + } else { + start = 1; + stop = 0; + } + + begin = first_line[start]; + end = last_line[stop]; + if (end) + end = end->next; + /* Skip empty selection. */ if (end == begin) return; @@ -3912,15 +3916,15 @@ void TextBlock::visitSelection(TextSelectionVisitor *visitor, for (p = begin; p != end; p = p->next) { if (p == begin && style != selectionStyleLine) { - child_selection.x1 = start_x; - child_selection.y1 = start_y; + child_selection.x1 = x[start]; + child_selection.y1 = y[start]; } else { child_selection.x1 = 0; child_selection.y1 = 0; } if (p->next == end && style != selectionStyleLine) { - child_selection.x2 = stop_x; - child_selection.y2 = stop_y; + child_selection.x2 = x[stop]; + child_selection.y2 = y[stop]; } else { child_selection.x2 = page->pageWidth; child_selection.y2 = page->pageHeight; @@ -3934,73 +3938,126 @@ void TextPage::visitSelection(TextSelectionVisitor *visitor, PDFRectangle *selection, SelectionStyle style) { - int i, begin, end; PDFRectangle child_selection; - double start_x, start_y, stop_x, stop_y; - TextBlock *b; - - begin = nBlocks; - end = 0; - start_x = selection->x1; - start_y = selection->y1; - stop_x = selection->x2; - stop_y = selection->y2; - - for (i = 0; i < nBlocks; i++) { - b = blocks[i]; - - if (selection->x1 < b->xMax && selection->y1 < b->yMax && - selection->x2 < b->xMax && selection->y2 < b->yMax && i < begin) { - begin = i; - if (selection->y1 < selection->y2) { - start_x = selection->x1; - start_y = selection->y1; - stop_x = selection->x2; - stop_y = selection->y2; - } else { - start_x = selection->x2; - start_y = selection->y2; - stop_x = selection->x1; - stop_y = selection->y1; + double x[2], y[2]; + TextFlow *flow, *first_flow[2], *last_flow[6]; + TextBlock *blk, *first_block[2], *mid_block[2], *last_block[2]; + int i, count = 0, first_count[2], last_count[2], start, stop; + + x[0] = selection->x1; + y[0] = selection->y1; + x[1] = selection->x2; + y[1] = selection->y2; + + for (i = 0; i < 2; i++) { + first_block[i] = NULL; + first_flow[i] = NULL; + first_count[i] = 0; + last_block[i] = NULL; + last_flow[i] = NULL; + last_count[i] = 0; + mid_block[i] = NULL; + } + + // for each of the two points in the selection, track 3 blocks: + // the 'mid' block contains the point; the 'first' block - + // which is the mid block if set, or the first block that + // ends below and right of the point; and the 'last' block, + // which is the first block if set, or the last block that + // begins above and left of the point. By construction these + // will always be read in the order first, mid, last. + for (flow = flows; flow && + (!mid_block[0] || !mid_block[1]); flow = flow->next) { + for (blk = flow->blocks; blk && + (!mid_block[0] || !mid_block[1]); blk = blk->next) { + count++; + for (i = 0; i < 2; i++) { + if (!mid_block[i] && + x[i] >= blk->xMin && + y[i] >= blk->yMin && + x[i] < blk->xMax && + y[i] < blk->yMax) { + mid_block[i] = blk; + } + if (blk == mid_block[i] || + (!first_block[i] && + x[i] < blk->xMax && + y[i] < blk->yMax)) { + first_block[i] = blk; + first_flow[i] = flow; + first_count[i] = count; + } + // force the last block to always be last + if (blk == first_block[i] || + (!mid_block[i] && + x[i] >= blk->xMin && + y[i] >= blk->yMin)) { + last_block[i] = blk; + last_flow[i] = flow; + last_count[i] = count; + } } - } else if (selection->x1 < b->xMax && selection->y1 < b->yMax && i < begin) { - begin = i; - start_x = selection->x1; - start_y = selection->y1; - stop_x = selection->x2; - stop_y = selection->y2; - } else if (selection->x2 < b->xMax && selection->y2 < b->yMax && i < begin) { - begin = i; - start_x = selection->x2; - start_y = selection->y2; - stop_x = selection->x1; - stop_y = selection->y1; - } - - if ((selection->x1 > b->xMin && selection->y1 > b->yMin) || - (selection->x2 > b->xMin && selection->y2 > b->yMin)) - end = i + 1; + } } - for (i = begin; i < end; i++) { - if (blocks[i]->xMin < start_x && start_x < blocks[i]->xMax && - blocks[i]->yMin < start_y && start_y < blocks[i]->yMax) { - child_selection.x1 = start_x; - child_selection.y1 = start_y; + // In the page margins, all of the tracked blocks + // may be null. On the basis that a user making + // a selection was trying to select *something*, + // guess an appropriate block. + // This leaves the bottom left as the only place + // where first or last are null. + for (i = 0; i < 2; i++) { + if (!first_block[i] && !last_block[i] && + y[i] <= flows->blocks->yMin) { + first_block[i] = flows->blocks; + first_flow[i] = flows; + first_count[i] = 1; + } + if (first_block[i] && !last_block[i]) { + last_block[i] = first_block[i]; + last_flow[i] = first_flow[i]; + last_count[i] = first_count[i]; + } else if (!first_block[i] && last_block[i]) { + first_block[i] = last_block[i]; + first_flow[i] = last_flow[i]; + first_count[i] = last_count[i]; + } + } + + // Now decide which point was first. + if (first_block[0] && + (!first_block[1] || + first_count[0] <= first_count[1])) { + start = 0; + stop = 1; + } else { + start = 1; + stop = 0; + } + + for (flow = first_flow[start]; flow; flow = flow->next) { + if (flow == first_flow[start]) { + blk = first_block[start]; } else { + blk = flow->blocks; + } + for (; blk; blk = blk->next) { child_selection.x1 = 0; child_selection.y1 = 0; - } - if (blocks[i]->xMin < stop_x && stop_x < blocks[i]->xMax && - blocks[i]->yMin < stop_y && stop_y < blocks[i]->yMax) { - child_selection.x2 = stop_x; - child_selection.y2 = stop_y; - } else { child_selection.x2 = pageWidth; child_selection.y2 = pageHeight; + if (blk == first_block[start]) { + child_selection.x1 = fmax(blk->xMin, fmin(blk->xMax, x[start])); + child_selection.y1 = fmax(blk->yMin, fmin(blk->yMax, y[start])); + } + if (blk == last_block[stop]) { + child_selection.x2 = fmax(blk->xMin, fmin(blk->xMax, x[stop])); + child_selection.y2 = fmax(blk->yMin, fmin(blk->yMax, y[stop])); + blk->visitSelection(visitor, &child_selection, style); + return; + } + blk->visitSelection(visitor, &child_selection, style); } - - blocks[i]->visitSelection(visitor, &child_selection, style); } } -- 1.6.2.2