Skip to content

Client API

GDELTClient

Main client for accessing all GDELT data sources.

This is the primary entry point for the py-gdelt library. It manages the lifecycle of all dependencies (HTTP client, file source, BigQuery source) and provides convenient namespace access to all endpoints.

The client can be used as either an async or sync context manager, and supports dependency injection for testing.

Parameters:

Name Type Description Default
settings GDELTSettings | None

Optional GDELTSettings instance. If None, creates default settings.

None
config_path Path | None

Optional path to TOML configuration file. Only used if settings is None. If both are provided, settings takes precedence.

None
http_client AsyncClient | None

Optional shared HTTP client for testing. If None, client creates and owns its own HTTP client. If provided, the lifecycle is managed externally and the client will not be closed on exit.

None
Example

async with GDELTClient() as client: ... events = await client.events.query(filter_obj) ... articles = await client.doc.search("climate") ... theme = client.lookups.themes.get_category("ENV_CLIMATECHANGE")

With config file

async with GDELTClient(config_path=Path("gdelt.toml")) as client: ... pass

With custom settings

settings = GDELTSettings(timeout=60, max_retries=5) async with GDELTClient(settings=settings) as client: ... pass

With dependency injection for testing

async with httpx.AsyncClient() as http_client: ... async with GDELTClient(http_client=http_client) as client: ... pass

Source code in src/py_gdelt/client.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
class GDELTClient:
    """Main client for accessing all GDELT data sources.

    This is the primary entry point for the py-gdelt library. It manages the
    lifecycle of all dependencies (HTTP client, file source, BigQuery source)
    and provides convenient namespace access to all endpoints.

    The client can be used as either an async or sync context manager, and
    supports dependency injection for testing.

    Args:
        settings: Optional GDELTSettings instance. If None, creates default settings.
        config_path: Optional path to TOML configuration file. Only used if
            settings is None. If both are provided, settings takes precedence.
        http_client: Optional shared HTTP client for testing. If None, client
            creates and owns its own HTTP client. If provided, the lifecycle
            is managed externally and the client will not be closed on exit.

    Example:
        >>> async with GDELTClient() as client:
        ...     events = await client.events.query(filter_obj)
        ...     articles = await client.doc.search("climate")
        ...     theme = client.lookups.themes.get_category("ENV_CLIMATECHANGE")

        >>> # With config file
        >>> async with GDELTClient(config_path=Path("gdelt.toml")) as client:
        ...     pass

        >>> # With custom settings
        >>> settings = GDELTSettings(timeout=60, max_retries=5)
        >>> async with GDELTClient(settings=settings) as client:
        ...     pass

        >>> # With dependency injection for testing
        >>> async with httpx.AsyncClient() as http_client:
        ...     async with GDELTClient(http_client=http_client) as client:
        ...         pass
    """

    def __init__(
        self,
        settings: GDELTSettings | None = None,
        config_path: Path | None = None,
        http_client: httpx.AsyncClient | None = None,
    ) -> None:
        # Initialize settings
        if settings is not None:
            self.settings = settings
        elif config_path is not None:
            self.settings = GDELTSettings(config_path=config_path)
        else:
            self.settings = GDELTSettings()

        # HTTP client management
        self._http_client = http_client
        self._owns_http_client = http_client is None

        # Source instances (created lazily)
        self._file_source: FileSource | None = None
        self._bigquery_source: BigQuerySource | None = None
        self._owns_sources = True

        # Lifecycle state
        self._initialized = False

    async def _initialize(self) -> None:
        """Initialize sources and HTTP client.

        Called automatically on first use via context manager.
        Creates HTTP client (if not injected) and initializes file source.
        BigQuery source is created only if credentials are configured.
        """
        if self._initialized:
            return

        # Create HTTP client if not injected
        if self._owns_http_client:
            self._http_client = httpx.AsyncClient(
                timeout=httpx.Timeout(
                    connect=10.0,
                    read=self.settings.timeout,
                    write=10.0,
                    pool=5.0,
                ),
                follow_redirects=True,
            )

        # Initialize file source
        self._file_source = FileSource(
            settings=self.settings,
            client=self._http_client,
        )
        await self._file_source.__aenter__()

        # Initialize BigQuery source if credentials are configured
        if self.settings.bigquery_project and self.settings.bigquery_credentials:
            try:
                self._bigquery_source = BigQuerySource(settings=self.settings)
                logger.debug(
                    "Initialized BigQuerySource with project %s",
                    self.settings.bigquery_project,
                )
            except ImportError as e:
                # google-cloud-bigquery package not installed
                logger.warning(
                    "BigQuery package not installed: %s. "
                    "Install with: pip install py-gdelt[bigquery]",
                    e,
                )
                self._bigquery_source = None
            except (OSError, FileNotFoundError) as e:
                # Credentials file not found or not readable
                logger.warning(
                    "BigQuery credentials file error: %s. BigQuery fallback will be unavailable.",
                    e,
                )
                self._bigquery_source = None
            except Exception as e:  # noqa: BLE001
                # Catch all Google SDK errors without importing optional dependency
                # This is an error boundary - BigQuery is optional, errors should not crash
                logger.warning(
                    "Failed to initialize BigQuerySource (%s): %s. "
                    "BigQuery fallback will be unavailable.",
                    type(e).__name__,
                    e,
                )
                self._bigquery_source = None

        self._initialized = True
        logger.debug("GDELTClient initialized successfully")

    async def _cleanup(self) -> None:
        """Clean up resources.

        Closes file source, BigQuery source (if created), and HTTP client (if owned).
        """
        if not self._initialized:
            return

        # Close file source
        if self._file_source is not None:
            await self._file_source.__aexit__(None, None, None)
            self._file_source = None

        # BigQuery source doesn't need explicit cleanup (no persistent connections)
        self._bigquery_source = None

        # Close HTTP client if we own it
        if self._owns_http_client and self._http_client is not None:
            await self._http_client.aclose()
            self._http_client = None

        self._initialized = False
        logger.debug("GDELTClient cleaned up successfully")

    async def __aenter__(self) -> GDELTClient:
        """Async context manager entry.

        Returns:
            Self for use in async with statement.

        Example:
            >>> async with GDELTClient() as client:
            ...     events = await client.events.query(filter_obj)
        """
        await self._initialize()
        return self

    async def __aexit__(self, *args: Any) -> None:
        """Async context manager exit.

        Cleans up all owned resources.

        Args:
            *args: Exception info (unused, but required by protocol).
        """
        await self._cleanup()

    def __enter__(self) -> GDELTClient:
        """Sync context manager entry.

        This provides synchronous (blocking) access to the client for use in
        non-async code. It uses asyncio.run() internally to manage the event loop.

        Important Limitations:
            - MUST be called from outside any existing async context/event loop.
              Calling from within an async function will raise RuntimeError.
            - Creates a new event loop for each context manager entry.
            - Use the async context manager (async with) when possible for
              better performance and compatibility.

        Returns:
            Self for use in with statement.

        Raises:
            RuntimeError: If called from within an already running event loop.

        Example:
            >>> # Correct: Used from synchronous code
            >>> with GDELTClient() as client:
            ...     events = client.events.query_sync(filter_obj)
            ...
            >>> # Wrong: Don't use from async code - use 'async with' instead
            >>> async def bad_example():
            ...     with GDELTClient() as client:  # RuntimeError!
            ...         pass
        """
        asyncio.run(self._initialize())
        return self

    def __exit__(self, *args: Any) -> None:
        """Sync context manager exit.

        Cleans up all owned resources. Uses asyncio.run() internally.

        Args:
            *args: Exception info (unused, but required by protocol).

        Raises:
            RuntimeError: If called from within an already running event loop.
        """
        asyncio.run(self._cleanup())

    # Endpoint namespaces (lazy initialization via cached_property)

    @cached_property
    def events(self) -> EventsEndpoint:
        """Access the Events endpoint.

        Provides methods for querying GDELT Events data from files or BigQuery.

        Returns:
            EventsEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     filter_obj = EventFilter(date_range=DateRange(start=date(2024, 1, 1)))
            ...     events = await client.events.query(filter_obj)
        """
        if self._file_source is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return EventsEndpoint(
            file_source=self._file_source,
            bigquery_source=self._bigquery_source,
            fallback_enabled=self.settings.fallback_to_bigquery,
        )

    @cached_property
    def mentions(self) -> MentionsEndpoint:
        """Access the Mentions endpoint.

        Provides methods for querying GDELT Mentions data from files or BigQuery.

        Returns:
            MentionsEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     filter_obj = EventFilter(date_range=DateRange(start=date(2024, 1, 1)))
            ...     mentions = await client.mentions.query("123456789", filter_obj)
        """
        if self._file_source is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return MentionsEndpoint(
            file_source=self._file_source,
            bigquery_source=self._bigquery_source,
            fallback_enabled=self.settings.fallback_to_bigquery,
        )

    @cached_property
    def gkg(self) -> GKGEndpoint:
        """Access the GKG (Global Knowledge Graph) endpoint.

        Provides methods for querying GDELT GKG data from files or BigQuery.

        Returns:
            GKGEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     filter_obj = GKGFilter(
            ...         date_range=DateRange(start=date(2024, 1, 1)),
            ...         themes=["ENV_CLIMATECHANGE"]
            ...     )
            ...     records = await client.gkg.query(filter_obj)
        """
        if self._file_source is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return GKGEndpoint(
            file_source=self._file_source,
            bigquery_source=self._bigquery_source,
            fallback_enabled=self.settings.fallback_to_bigquery,
        )

    @cached_property
    def ngrams(self) -> NGramsEndpoint:
        """Access the NGrams endpoint.

        Provides methods for querying GDELT NGrams data (files only).

        Returns:
            NGramsEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     filter_obj = NGramsFilter(
            ...         date_range=DateRange(start=date(2024, 1, 1)),
            ...         language="en"
            ...     )
            ...     records = await client.ngrams.query(filter_obj)
        """
        if self._file_source is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return NGramsEndpoint(
            settings=self.settings,
            file_source=self._file_source,
        )

    @cached_property
    def doc(self) -> DocEndpoint:
        """Access the DOC 2.0 API endpoint.

        Provides methods for searching GDELT articles via the DOC API.

        Returns:
            DocEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     articles = await client.doc.search("climate change", max_results=100)
        """
        if self._http_client is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return DocEndpoint(
            settings=self.settings,
            client=self._http_client,
        )

    @cached_property
    def geo(self) -> GeoEndpoint:
        """Access the GEO 2.0 API endpoint.

        Provides methods for querying geographic locations from news articles.

        Returns:
            GeoEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     result = await client.geo.search("earthquake", max_points=100)
        """
        if self._http_client is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return GeoEndpoint(
            settings=self.settings,
            client=self._http_client,
        )

    @cached_property
    def context(self) -> ContextEndpoint:
        """Access the Context 2.0 API endpoint.

        Provides methods for contextual analysis of search terms.

        Returns:
            ContextEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     result = await client.context.analyze("climate change")
        """
        if self._http_client is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return ContextEndpoint(
            settings=self.settings,
            client=self._http_client,
        )

    @cached_property
    def tv(self) -> TVEndpoint:
        """Access the TV API endpoint.

        Provides methods for querying television news transcripts.

        Returns:
            TVEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     clips = await client.tv.search("climate change", station="CNN")
        """
        if self._http_client is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return TVEndpoint(
            settings=self.settings,
            client=self._http_client,
        )

    @cached_property
    def tv_ai(self) -> TVAIEndpoint:
        """Access the TVAI API endpoint.

        Provides methods for AI-enhanced television news analysis.

        Returns:
            TVAIEndpoint instance.

        Raises:
            RuntimeError: If client not initialized (use context manager).

        Example:
            >>> async with GDELTClient() as client:
            ...     result = await client.tv_ai.analyze("election coverage")
        """
        if self._http_client is None:
            msg = "GDELTClient not initialized. Use 'async with GDELTClient() as client:'"
            raise RuntimeError(msg)
        return TVAIEndpoint(
            settings=self.settings,
            client=self._http_client,
        )

    @cached_property
    def lookups(self) -> Lookups:
        """Access lookup tables for CAMEO codes, themes, and countries.

        Provides access to all GDELT lookup tables with lazy loading.

        Returns:
            Lookups instance for code/theme/country lookups.

        Example:
            >>> async with GDELTClient() as client:
            ...     # CAMEO codes
            ...     event_entry = client.lookups.cameo["14"]
            ...     event_name = event_entry.name  # "PROTEST"
            ...
            ...     # GKG themes
            ...     category = client.lookups.themes.get_category("ENV_CLIMATECHANGE")
            ...
            ...     # Country codes
            ...     iso_code = client.lookups.countries.fips_to_iso3("US")  # "USA"
        """
        return Lookups()

events cached property

Access the Events endpoint.

Provides methods for querying GDELT Events data from files or BigQuery.

Returns:

Type Description
EventsEndpoint

EventsEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... filter_obj = EventFilter(date_range=DateRange(start=date(2024, 1, 1))) ... events = await client.events.query(filter_obj)

mentions cached property

Access the Mentions endpoint.

Provides methods for querying GDELT Mentions data from files or BigQuery.

Returns:

Type Description
MentionsEndpoint

MentionsEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... filter_obj = EventFilter(date_range=DateRange(start=date(2024, 1, 1))) ... mentions = await client.mentions.query("123456789", filter_obj)

gkg cached property

Access the GKG (Global Knowledge Graph) endpoint.

Provides methods for querying GDELT GKG data from files or BigQuery.

Returns:

Type Description
GKGEndpoint

GKGEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... filter_obj = GKGFilter( ... date_range=DateRange(start=date(2024, 1, 1)), ... themes=["ENV_CLIMATECHANGE"] ... ) ... records = await client.gkg.query(filter_obj)

ngrams cached property

Access the NGrams endpoint.

Provides methods for querying GDELT NGrams data (files only).

Returns:

Type Description
NGramsEndpoint

NGramsEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... filter_obj = NGramsFilter( ... date_range=DateRange(start=date(2024, 1, 1)), ... language="en" ... ) ... records = await client.ngrams.query(filter_obj)

doc cached property

Access the DOC 2.0 API endpoint.

Provides methods for searching GDELT articles via the DOC API.

Returns:

Type Description
DocEndpoint

DocEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... articles = await client.doc.search("climate change", max_results=100)

geo cached property

Access the GEO 2.0 API endpoint.

Provides methods for querying geographic locations from news articles.

Returns:

Type Description
GeoEndpoint

GeoEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... result = await client.geo.search("earthquake", max_points=100)

context cached property

Access the Context 2.0 API endpoint.

Provides methods for contextual analysis of search terms.

Returns:

Type Description
ContextEndpoint

ContextEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... result = await client.context.analyze("climate change")

tv cached property

Access the TV API endpoint.

Provides methods for querying television news transcripts.

Returns:

Type Description
TVEndpoint

TVEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... clips = await client.tv.search("climate change", station="CNN")

tv_ai cached property

Access the TVAI API endpoint.

Provides methods for AI-enhanced television news analysis.

Returns:

Type Description
TVAIEndpoint

TVAIEndpoint instance.

Raises:

Type Description
RuntimeError

If client not initialized (use context manager).

Example

async with GDELTClient() as client: ... result = await client.tv_ai.analyze("election coverage")

lookups cached property

Access lookup tables for CAMEO codes, themes, and countries.

Provides access to all GDELT lookup tables with lazy loading.

Returns:

Type Description
Lookups

Lookups instance for code/theme/country lookups.

Example

async with GDELTClient() as client: ... # CAMEO codes ... event_entry = client.lookups.cameo["14"] ... event_name = event_entry.name # "PROTEST" ... ... # GKG themes ... category = client.lookups.themes.get_category("ENV_CLIMATECHANGE") ... ... # Country codes ... iso_code = client.lookups.countries.fips_to_iso3("US") # "USA"

__aenter__() async

Async context manager entry.

Returns:

Type Description
GDELTClient

Self for use in async with statement.

Example

async with GDELTClient() as client: ... events = await client.events.query(filter_obj)

Source code in src/py_gdelt/client.py
async def __aenter__(self) -> GDELTClient:
    """Async context manager entry.

    Returns:
        Self for use in async with statement.

    Example:
        >>> async with GDELTClient() as client:
        ...     events = await client.events.query(filter_obj)
    """
    await self._initialize()
    return self

__aexit__(*args) async

Async context manager exit.

Cleans up all owned resources.

Parameters:

Name Type Description Default
*args Any

Exception info (unused, but required by protocol).

()
Source code in src/py_gdelt/client.py
async def __aexit__(self, *args: Any) -> None:
    """Async context manager exit.

    Cleans up all owned resources.

    Args:
        *args: Exception info (unused, but required by protocol).
    """
    await self._cleanup()

__enter__()

Sync context manager entry.

This provides synchronous (blocking) access to the client for use in non-async code. It uses asyncio.run() internally to manage the event loop.

Important Limitations
  • MUST be called from outside any existing async context/event loop. Calling from within an async function will raise RuntimeError.
  • Creates a new event loop for each context manager entry.
  • Use the async context manager (async with) when possible for better performance and compatibility.

Returns:

Type Description
GDELTClient

Self for use in with statement.

Raises:

Type Description
RuntimeError

If called from within an already running event loop.

Example

Correct: Used from synchronous code

with GDELTClient() as client: ... events = client.events.query_sync(filter_obj) ...

Wrong: Don't use from async code - use 'async with' instead

async def bad_example(): ... with GDELTClient() as client: # RuntimeError! ... pass

Source code in src/py_gdelt/client.py
def __enter__(self) -> GDELTClient:
    """Sync context manager entry.

    This provides synchronous (blocking) access to the client for use in
    non-async code. It uses asyncio.run() internally to manage the event loop.

    Important Limitations:
        - MUST be called from outside any existing async context/event loop.
          Calling from within an async function will raise RuntimeError.
        - Creates a new event loop for each context manager entry.
        - Use the async context manager (async with) when possible for
          better performance and compatibility.

    Returns:
        Self for use in with statement.

    Raises:
        RuntimeError: If called from within an already running event loop.

    Example:
        >>> # Correct: Used from synchronous code
        >>> with GDELTClient() as client:
        ...     events = client.events.query_sync(filter_obj)
        ...
        >>> # Wrong: Don't use from async code - use 'async with' instead
        >>> async def bad_example():
        ...     with GDELTClient() as client:  # RuntimeError!
        ...         pass
    """
    asyncio.run(self._initialize())
    return self

__exit__(*args)

Sync context manager exit.

Cleans up all owned resources. Uses asyncio.run() internally.

Parameters:

Name Type Description Default
*args Any

Exception info (unused, but required by protocol).

()

Raises:

Type Description
RuntimeError

If called from within an already running event loop.

Source code in src/py_gdelt/client.py
def __exit__(self, *args: Any) -> None:
    """Sync context manager exit.

    Cleans up all owned resources. Uses asyncio.run() internally.

    Args:
        *args: Exception info (unused, but required by protocol).

    Raises:
        RuntimeError: If called from within an already running event loop.
    """
    asyncio.run(self._cleanup())

GDELTSettings

Bases: BaseSettings

Configuration settings for the GDELT client library.

Settings can be configured via: - Environment variables with GDELT_ prefix (e.g., GDELT_TIMEOUT=60) - TOML configuration file passed to config_path parameter - Default values

Environment variables take precedence over TOML configuration.

Parameters:

Name Type Description Default
config_path Path | None

Optional path to TOML configuration file. If provided and exists, settings will be loaded from it. Environment variables will override TOML settings.

None
**kwargs Any

Additional keyword arguments for setting field values.

{}

Attributes:

Name Type Description
model_config

Pydantic settings configuration (env prefix, case sensitivity)

bigquery_project str | None

Google Cloud project ID for BigQuery access

bigquery_credentials str | None

Path to Google Cloud credentials JSON file

cache_dir Path

Directory for caching downloaded GDELT data

cache_ttl int

Cache time-to-live in seconds

master_file_list_ttl int

Master file list cache TTL in seconds

max_retries int

Maximum number of HTTP request retries

timeout int

HTTP request timeout in seconds

max_concurrent_requests int

Maximum concurrent HTTP requests

max_concurrent_downloads int

Maximum concurrent file downloads

fallback_to_bigquery bool

Whether to fallback to BigQuery when APIs fail

validate_codes bool

Whether to validate CAMEO/country codes

Example

Using defaults

settings = GDELTSettings()

Loading from TOML file

settings = GDELTSettings(config_path=Path("gdelt.toml"))

Environment variables override TOML

import os os.environ["GDELT_TIMEOUT"] = "60" settings = GDELTSettings() settings.timeout 60

Source code in src/py_gdelt/config.py
class GDELTSettings(BaseSettings):
    """Configuration settings for the GDELT client library.

    Settings can be configured via:
    - Environment variables with GDELT_ prefix (e.g., GDELT_TIMEOUT=60)
    - TOML configuration file passed to config_path parameter
    - Default values

    Environment variables take precedence over TOML configuration.

    Args:
        config_path: Optional path to TOML configuration file.
            If provided and exists, settings will be loaded from it.
            Environment variables will override TOML settings.
        **kwargs: Additional keyword arguments for setting field values.

    Attributes:
        model_config: Pydantic settings configuration (env prefix, case sensitivity)
        bigquery_project: Google Cloud project ID for BigQuery access
        bigquery_credentials: Path to Google Cloud credentials JSON file
        cache_dir: Directory for caching downloaded GDELT data
        cache_ttl: Cache time-to-live in seconds
        master_file_list_ttl: Master file list cache TTL in seconds
        max_retries: Maximum number of HTTP request retries
        timeout: HTTP request timeout in seconds
        max_concurrent_requests: Maximum concurrent HTTP requests
        max_concurrent_downloads: Maximum concurrent file downloads
        fallback_to_bigquery: Whether to fallback to BigQuery when APIs fail
        validate_codes: Whether to validate CAMEO/country codes

    Example:
        >>> # Using defaults
        >>> settings = GDELTSettings()

        >>> # Loading from TOML file
        >>> settings = GDELTSettings(config_path=Path("gdelt.toml"))

        >>> # Environment variables override TOML
        >>> import os
        >>> os.environ["GDELT_TIMEOUT"] = "60"
        >>> settings = GDELTSettings()
        >>> settings.timeout
        60
    """

    model_config = SettingsConfigDict(
        env_prefix="GDELT_",
        case_sensitive=False,
        extra="ignore",
    )

    # BigQuery settings (optional)
    bigquery_project: str | None = Field(
        default=None,
        description="Google Cloud project ID for BigQuery access",
    )
    bigquery_credentials: str | None = Field(
        default=None,
        description="Path to Google Cloud credentials JSON file",
    )

    # Cache settings
    cache_dir: Path = Field(
        default_factory=lambda: Path.home() / ".cache" / "gdelt",
        description="Directory for caching downloaded GDELT data",
    )
    cache_ttl: int = Field(
        default=3600,
        description="Cache time-to-live in seconds",
    )
    master_file_list_ttl: int = Field(
        default=300,
        description="Master file list cache TTL in seconds (default 5 minutes)",
    )

    # HTTP settings
    max_retries: int = Field(
        default=3,
        description="Maximum number of HTTP request retries",
    )
    timeout: int = Field(
        default=30,
        description="HTTP request timeout in seconds",
    )
    max_concurrent_requests: int = Field(
        default=10,
        description="Maximum concurrent HTTP requests",
    )
    max_concurrent_downloads: int = Field(
        default=10,
        description="Maximum concurrent file downloads",
    )

    # Behavior settings
    fallback_to_bigquery: bool = Field(
        default=True,
        description="Whether to fallback to BigQuery when APIs fail",
    )
    validate_codes: bool = Field(
        default=True,
        description="Whether to validate CAMEO/country codes",
    )

    # Class variable to store config_path during initialization
    _current_config_path: Path | None = None

    def __init__(self, config_path: Path | None = None, **kwargs: Any) -> None:
        # Store config_path temporarily on class for settings_customise_sources
        GDELTSettings._current_config_path = config_path
        try:
            # Initialize the parent BaseSettings
            super().__init__(**kwargs)
        finally:
            # Clean up class variable
            GDELTSettings._current_config_path = None

    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        file_secret_settings: PydanticBaseSettingsSource,  # noqa: ARG003
        **_kwargs: Any,
    ) -> tuple[PydanticBaseSettingsSource, ...]:
        """Customize settings sources to include TOML configuration.

        The order of sources determines precedence (first source wins):
        1. Init settings (kwargs passed to __init__)
        2. Environment variables (GDELT_ prefix)
        3. TOML configuration file
        4. Default values

        Args:
            settings_cls: The settings class being customized.
            init_settings: Settings from __init__ kwargs.
            env_settings: Settings from environment variables.
            dotenv_settings: Settings from .env file (unused).
            file_secret_settings: Settings from secret files (unused).
            **_kwargs: Additional keyword arguments (unused).

        Returns:
            Tuple of settings sources in priority order.
        """
        # Get config_path from class variable set in __init__
        config_path = cls._current_config_path
        toml_source = TOMLConfigSource(settings_cls, config_path=config_path)

        # Return sources in priority order (first wins)
        return (
            init_settings,  # Highest priority: explicit kwargs
            env_settings,  # Environment variables
            toml_source,  # TOML configuration
            # Default values are handled by Pydantic automatically
        )

settings_customise_sources(settings_cls, init_settings, env_settings, dotenv_settings, file_secret_settings, **_kwargs) classmethod

Customize settings sources to include TOML configuration.

The order of sources determines precedence (first source wins): 1. Init settings (kwargs passed to init) 2. Environment variables (GDELT_ prefix) 3. TOML configuration file 4. Default values

Parameters:

Name Type Description Default
settings_cls type[BaseSettings]

The settings class being customized.

required
init_settings PydanticBaseSettingsSource

Settings from init kwargs.

required
env_settings PydanticBaseSettingsSource

Settings from environment variables.

required
dotenv_settings PydanticBaseSettingsSource

Settings from .env file (unused).

required
file_secret_settings PydanticBaseSettingsSource

Settings from secret files (unused).

required
**_kwargs Any

Additional keyword arguments (unused).

{}

Returns:

Type Description
tuple[PydanticBaseSettingsSource, ...]

Tuple of settings sources in priority order.

Source code in src/py_gdelt/config.py
@classmethod
def settings_customise_sources(
    cls,
    settings_cls: type[BaseSettings],
    init_settings: PydanticBaseSettingsSource,
    env_settings: PydanticBaseSettingsSource,
    dotenv_settings: PydanticBaseSettingsSource,  # noqa: ARG003
    file_secret_settings: PydanticBaseSettingsSource,  # noqa: ARG003
    **_kwargs: Any,
) -> tuple[PydanticBaseSettingsSource, ...]:
    """Customize settings sources to include TOML configuration.

    The order of sources determines precedence (first source wins):
    1. Init settings (kwargs passed to __init__)
    2. Environment variables (GDELT_ prefix)
    3. TOML configuration file
    4. Default values

    Args:
        settings_cls: The settings class being customized.
        init_settings: Settings from __init__ kwargs.
        env_settings: Settings from environment variables.
        dotenv_settings: Settings from .env file (unused).
        file_secret_settings: Settings from secret files (unused).
        **_kwargs: Additional keyword arguments (unused).

    Returns:
        Tuple of settings sources in priority order.
    """
    # Get config_path from class variable set in __init__
    config_path = cls._current_config_path
    toml_source = TOMLConfigSource(settings_cls, config_path=config_path)

    # Return sources in priority order (first wins)
    return (
        init_settings,  # Highest priority: explicit kwargs
        env_settings,  # Environment variables
        toml_source,  # TOML configuration
        # Default values are handled by Pydantic automatically
    )