All files / src InfoResult.ts

98.36% Statements 60/61
93.75% Branches 15/16
100% Functions 4/4
98.36% Lines 60/61
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215 1x
 
 
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
 
8x
 
 
 
 
 
 
 
8x
 
 
 
8x
 
 
 
 
 
8x
 
 
 
 
 
 
8x
 
 
 
 
 
8x
 
 
8x
 
 
 
 
 
 
8x
2x
 
 
 
2x
 
2x
2x
2x
 
 
2x
 
2x
1x
1x
 
 
2x
1x
1x
 
 
 
 
2x
6x
6x
 
6x
6x
1x
1x
6x
1x
1x
6x
1x
1x
6x
1x
1x
6x
1x
1x
6x
1x
1x
6x
6x
 
1x
2x
 
 
 
 
 
 
8x
6x
 
3x
3x
3x
3x
 
 
6x
 
8x
8x
8x
8x
  import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
import type { Metadata } from 'pdfjs-dist/types/src/display/metadata.js';
 
const XMP_DATE_PROPERTIES = ['xmp:createdate', 'xmp:modifydate', 'xmp:metadatadate', 'xap:createdate', 'xap:modifydate', 'xap:metadatadate'];
 
export type { Metadata } from 'pdfjs-dist/types/src/display/metadata.js';
 
/**
 * Node representing a single item in the PDF outline (bookmarks).
 * This mirrors the structure returned by PDF.js' getOutline() API.
 */
export interface OutlineNode {
	// The visible title of the bookmark / outline entry.
	title: string;
 
	// If true, the title should be rendered in bold.
	bold: boolean;
 
	// If true, the title should be rendered in italic.
	italic: boolean;
 
	// Optional RGBA color for the title as a clamped byte array.
	color: Uint8ClampedArray;
 
	// Destination for the outline item. PDF.js may return a named
	// destination (string) or an array representing an explicit destination.
	// Can be null when no explicit destination is available.
	// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
	dest: string | Array<any> | null;
 
	// If the outline entry points to an external URL, it will be here.
	url: string | null;
 
	// When PDF.js flags a URL as unsafe the raw value is available here.
	unsafeUrl?: string;
 
	// Whether the link should open in a new window/tab if rendered.
	newWindow?: boolean;
 
	// Number of child entries (if provided by the PDF). May be undefined.
	count?: number;
 
	// Child outline items. Type is kept loose to match PDF.js returns.
	// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
	items: Array<any>;
}
 
/**
 * Consolidated date information gathered from different PDF sources.
 * The PDF 'Info' dictionary contains CreationDate / ModDate and
 * the XMP/XAP metadata can contain several timestamps as well. This
 * structure collects those values (if present) as JavaScript Date objects
 * or null when the property exists but cannot be parsed.
 */
export type DateNode = {
	CreationDate?: Date | null;
	ModDate?: Date | null;
	XmpCreateDate?: Date | null;
	XmpModifyDate?: Date | null;
	XmpMetadataDate?: Date | null;
	XapCreateDate?: Date | null;
	XapModifyDate?: Date | null;
	XapMetadataDate?: Date | null;
};
 
/**
 * Per-page link extraction result.
 * - pageNumber: the physical page index (1-based) within the PDF document.
 * - pageLabel: optional printed page label shown by PDF viewers (e.g. "iii", "1", "A-1");
 *              this can differ from the physical page number and may be undefined
 *              when the document does not provide labels.
 * - links: array of text->URL mappings that were found/overlaid on the page.
 * - width/height: page dimensions in PDF units for the viewport used.
 */
export type PageLinkResult = {
	// Physical page number (1-based index inside the PDF document).
	pageNumber: number;
 
	// Optional printed page label as displayed by PDF viewers. May be null/undefined
	// if the document does not provide explicit labels for pages.
	pageLabel?: string | null;
 
	// Hyperlinks that were overlaid or embedded on the page surface. Each entry
	// contains the visible text (if any) and the resolved URL.
	links: Array<{ text: string; url: string }>;
 
	// Page width and height for the page viewport that was used when extracting links.
	width: number;
	height: number;
};
 
/**
 * Aggregated information about a PDF document returned by getInfo().
 * The object contains high-level metadata, outline/bookmark structure,
 * per-page extracted hyperlinks and utility helpers for parsing dates.
 */
export class InfoResult {
	// Total number of pages in the PDF document (count of physical pages).
	total: number;
 
	/**
	 * The PDF 'Info' dictionary. Typical fields include title, author, subject,
	 * Creator, Producer and Creation/Modification dates. The exact structure is
	 * determined by the PDF and as returned by PDF.js.
	 */
	// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
	info?: any;
 
	// Low-level document metadata object (XMP). Use this to access extended
	// properties that are not present in the Info dictionary.
	metadata?: Metadata;
 
	/**
	 * An array of document fingerprint strings provided by PDF.js. Useful
	 * for caching, de-duplication or identifying a document across runs.
	 */
	fingerprints?: Array<string | null>;
 
	/**
	 * Permission flags for the document as returned by PDF.js (or null).
	 * These flags indicate capabilities such as printing, copying and
	 * other restrictions imposed by the PDF security settings.
	 */
	permission?: number[] | null;
 
	/**
	 * Optional document outline (bookmarks). When present this is the
	 * hierarchical navigation structure which viewers use for quick access.
	 */
	outline?: Array<OutlineNode> | null;
 
	// Results with per-page hyperlink extraction. Empty array by default.
	pages: Array<PageLinkResult> = [];
 
	/**
	 * Collects dates from different sources (Info dictionary and XMP/XAP metadata)
	 * and returns them as a DateNode where available. This helps callers compare
	 * and choose the most relevant timestamp (for example a creation date vs XMP date).
	 */
	public getDateNode(): DateNode {
		const result: DateNode = {};
 
		// The Info dictionary may contain CreationDate/ModDate in PDF date string format.
		// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
		const CreationDate = (this.info as any)?.CreationDate;
 
		if (CreationDate) {
			result.CreationDate = pdfjs.PDFDateString.toDateObject(CreationDate);
		}
 
		// biome-ignore lint/suspicious/noExplicitAny: <unsupported underline type>
		const ModDate = (this.info as any)?.ModDate;
 
		if (ModDate) {
			result.ModDate = pdfjs.PDFDateString.toDateObject(ModDate);
		}
 
		// If no XMP metadata is present, return the Info-based dates only.
		if (!this.metadata) {
			return result;
		}
 
		// Extract several XMP/XAP date properties (if present) and attempt to
		// parse them as ISO-like strings. Parsed values are added to the
		// corresponding DateNode fields.
		for (const prop of XMP_DATE_PROPERTIES) {
			const value = this.metadata?.get(prop);
			const date = this.parseISODateString(value);
 
			switch (prop) {
				case XMP_DATE_PROPERTIES[0]:
					result.XmpCreateDate = date;
					break;
				case XMP_DATE_PROPERTIES[1]:
					result.XmpModifyDate = date;
					break;
				case XMP_DATE_PROPERTIES[2]:
					result.XmpMetadataDate = date;
					break;
				case XMP_DATE_PROPERTIES[3]:
					result.XapCreateDate = date;
					break;
				case XMP_DATE_PROPERTIES[4]:
					result.XapModifyDate = date;
					break;
				case XMP_DATE_PROPERTIES[5]:
					result.XapMetadataDate = date;
					break;
			}
		}
 
		return result;
	}
 
	/**
	 * Try to parse an ISO-8601 date string from XMP/XAP metadata. If the
	 * value is falsy or cannot be parsed, undefined is returned to indicate
	 * absence or unparsable input.
	 */
	private parseISODateString(isoDateString: string): Date | undefined {
		if (!isoDateString) return undefined;
 
		const parsedDate = Date.parse(isoDateString);
		if (!Number.isNaN(parsedDate)) {
			return new Date(parsedDate);
		}
 
		return undefined;
	}
 
	constructor(total: number) {
		this.total = total;
	}
}