Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion backend/src/api/wikibase/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import {
InstanceId,
OAuthCredentials,
PropertySearchResultSchema,
QuerySchema,
Term,
Expand All @@ -10,7 +11,7 @@ import { wikibasePlugin } from '@backend/plugins/wikibase'
import { constraintValidationService } from '@backend/services/constraint-validation.service'
import { ApiErrorHandler } from '@backend/types/error-handler'
import { ApiErrors } from '@backend/types/error-schemas'
import { PropertyId, WikibaseDataType, ItemId } from '@backend/types/wikibase-schema'
import { ItemId, PropertyId, WikibaseDataType } from '@backend/types/wikibase-schema'
import { cors } from '@elysiajs/cors'
import { Elysia, t } from 'elysia'

Expand All @@ -27,6 +28,33 @@ export const wikibaseEntitiesApi = new Elysia({ prefix: '/api/wikibase' })
}),
})

.post(
'/:instanceId/csrf-token',
async ({ params: { instanceId }, body: { credentials }, wikibase }) => {
const {
query: {
tokens: { csrftoken },
},
} = await wikibase.getCsrfToken(instanceId, credentials)
return {
token: csrftoken,
}
},
{
body: t.Object({
credentials: OAuthCredentials,
}),
response: t.Object({
token: t.String(),
}),
detail: {
summary: 'Get CSRF token',
description: 'Get a CSRF token for a Wikibase instance using OAuth credentials',
tags: ['Wikibase'],
},
},
)

.post(
'/:instanceId/properties/fetch',
async ({ params: { instanceId }, wikibase, db }) => {
Expand Down
31 changes: 27 additions & 4 deletions backend/src/api/wikibase/schemas.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
import { WikibaseDataType } from '@backend/types/wikibase-schema'
import { t } from 'elysia'

export const OAuthCredentials = t.Object({
consumerKey: t.String({
description: 'Consumer key',
}),
consumerSecret: t.String({
description: 'Consumer secret',
}),
accessToken: t.String({
description: 'Access token',
}),
accessTokenSecret: t.String({
description: 'Access secret',
}),
})
export type OAuthCredentials = typeof OAuthCredentials.static

export const CSRFTokenResponse = t.Object({
query: t.Object({
tokens: t.Object({
csrftoken: t.String(),
}),
}),
})
export type CSRFTokenResponse = typeof CSRFTokenResponse.static

export const Term = t.Union([t.Literal('label'), t.Literal('alias'), t.Literal('description')])
export type Term = typeof Term.static

Expand All @@ -20,10 +45,8 @@ export const PropertySearchResultSchema = t.Object({
})
export type PropertySearchResult = typeof PropertySearchResultSchema.static

export const InstanceId = t.String({
description: 'Wikibase instance ID',
default: 'wikidata',
})
export const InstanceId = t.Union([t.Literal('wikidata'), t.Literal('commons')])
export type InstanceId = typeof InstanceId.static

export const QuerySchema = t.Object({
q: t.String({
Expand Down
7 changes: 4 additions & 3 deletions backend/src/services/constraint-validation.service.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import type { InstanceId } from '@backend/api/wikibase/schemas'
import { wikibaseService } from '@backend/services/wikibase.service'
import type {
ConstraintViolation,
Expand All @@ -15,7 +16,7 @@ export class ConstraintValidationService {
* Get constraints for a property using MediaWiki API
*/
async getPropertyConstraints(
instanceId: string,
instanceId: InstanceId,
propertyId: PropertyId,
): Promise<PropertyConstraint[]> {
const cacheKey = `${instanceId}:${propertyId}`
Expand Down Expand Up @@ -394,7 +395,7 @@ export class ConstraintValidationService {
* Validate a property value against its constraints
*/
async validateProperty(
instanceId: string,
instanceId: InstanceId,
propertyId: PropertyId,
values: any[],
): Promise<ValidationResult> {
Expand Down Expand Up @@ -484,7 +485,7 @@ export class ConstraintValidationService {
* Validate an entire schema against property constraints
*/
async validateSchema(
instanceId: string,
instanceId: InstanceId,
schema: Record<string, unknown[]>,
): Promise<ValidationResult> {
const allViolations: ConstraintViolation[] = []
Expand Down
5 changes: 2 additions & 3 deletions backend/src/services/mediawiki-api.service.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { ApiResponse, LoginResponse, MediaWikiConfig } from '@backend/types/mediawiki-api'

export class MediaWikiApiService {
private config: MediaWikiConfig
public config: MediaWikiConfig
private tokens = new Map<string, string>()

constructor(config: MediaWikiConfig) {
Expand Down Expand Up @@ -58,8 +58,7 @@ export class MediaWikiApiService {
const options: RequestInit = {
method,
headers: {
'User-Agent':
this.config.userAgent || 'DataForge/1.0 (https://github.com/DaxServer/dataforge)',
'User-Agent': this.config.userAgent,
},
}

Expand Down
85 changes: 63 additions & 22 deletions backend/src/services/wikibase-clients.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,84 @@
import { type CSRFTokenResponse, InstanceId, OAuthCredentials } from '@backend/api/wikibase/schemas'
import { MediaWikiApiService } from '@backend/services/mediawiki-api.service'
import type { MediaWikiConfig } from '@backend/types/mediawiki-api'
import OAuth from 'oauth'

export class WikibaseClient {
private clients: Record<string, MediaWikiApiService> = {
private readonly endpoints: Record<InstanceId, string> = {
wikidata: 'https://www.wikidata.org/w/api.php',
commons: 'https://commons.wikimedia.org/w/api.php',
}

private clients: Record<InstanceId, MediaWikiApiService> = {
wikidata: new MediaWikiApiService({
endpoint: 'https://www.wikidata.org/w/api.php',
endpoint: this.endpoints.wikidata,
userAgent: 'DataForge/1.0 (https://github.com/DaxServer/dataforge)',
timeout: 30000,
}),
commons: new MediaWikiApiService({
endpoint: this.endpoints.commons,
userAgent: 'DataForge/1.0 (https://github.com/DaxServer/dataforge)',
timeout: 30000,
}),
}

createClient(id: string, config: MediaWikiConfig): MediaWikiApiService {
const client = new MediaWikiApiService({
endpoint: config.endpoint,
userAgent: config.userAgent,
timeout: config.timeout,
token: config.token,
})

this.clients[id] = client
private credentials: Partial<Record<InstanceId, OAuthCredentials>> = {}
private authenticatedClients: Partial<Record<InstanceId, OAuth.OAuth>> = {}
Comment on lines +24 to +25
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Do not retain OAuth credentials in memory.

this.credentials stores secrets beyond the request lifetime and gets overwritten per instance; unnecessary and a privacy/compliance risk. Remove it; pass creds per call only (you already do).

Apply:

-  private credentials: Partial<Record<InstanceId, OAuthCredentials>> = {}
   private authenticatedClients: Partial<Record<InstanceId, OAuth.OAuth>> = {}
...
-      this.credentials[instanceId] = credentials

Also applies to: 38-38


return client
}

getClient(instanceId: string): MediaWikiApiService {
getClient(instanceId: InstanceId): MediaWikiApiService {
const client = this.clients[instanceId]
if (!client) {
throw new Error(`No client found for instance: ${instanceId}`)
}
return client
}

hasClient(instanceId: string): boolean {
return this.clients[instanceId] !== undefined
getAuthenticatedClient(instanceId: InstanceId, credentials: OAuthCredentials): OAuth.OAuth {
if (!(instanceId in this.authenticatedClients)) {
const client = this.getClient(instanceId)
this.credentials[instanceId] = credentials
this.authenticatedClients[instanceId] = new OAuth.OAuth(
this.endpoints[instanceId],
this.endpoints[instanceId],
Comment on lines +39 to +41
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use MediaWiki OAuth1 endpoints, not the Action API URL.

OAuth.OAuth’s first two args must be the request/access token endpoints. Using /w/api.php is wrong (works only by accident because you’re not calling token flows). Also set a request timeout on the OAuth client.

Apply:

-      this.authenticatedClients[instanceId] = new OAuth.OAuth(
-        this.endpoints[instanceId],
-        this.endpoints[instanceId],
+      const base = new URL(this.endpoints[instanceId]).origin
+      const requestTokenUrl = `${base}/w/index.php?title=Special:OAuth/initiate`
+      const accessTokenUrl  = `${base}/w/index.php?title=Special:OAuth/token`
+      const oa = new OAuth.OAuth(
+        requestTokenUrl,
+        accessTokenUrl,
         credentials.consumerKey,
         credentials.consumerSecret,
         '1.0',
         null,
         'HMAC-SHA1',
         undefined,
         {
           'User-Agent': client.config.userAgent,
         },
       )
+      // Ensure outbound calls don’t hang indefinitely
+      oa.setClientOptions?.({ timeout: 30000 })
+      this.authenticatedClients[instanceId] = oa

Based on learnings.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
this.authenticatedClients[instanceId] = new OAuth.OAuth(
this.endpoints[instanceId],
this.endpoints[instanceId],
const base = new URL(this.endpoints[instanceId]).origin
const requestTokenUrl = `${base}/w/index.php?title=Special:OAuth/initiate`
const accessTokenUrl = `${base}/w/index.php?title=Special:OAuth/token`
const oa = new OAuth.OAuth(
requestTokenUrl,
accessTokenUrl,
credentials.consumerKey,
credentials.consumerSecret,
'1.0',
null,
'HMAC-SHA1',
undefined,
{
'User-Agent': client.config.userAgent,
},
)
// Ensure outbound calls don’t hang indefinitely
oa.setClientOptions?.({ timeout: 30000 })
this.authenticatedClients[instanceId] = oa

credentials.consumerKey,
credentials.consumerSecret,
'1.0',
null,
'HMAC-SHA1',
undefined,
{
'User-Agent': client.config.userAgent,
},
)
}
return this.authenticatedClients[instanceId] as OAuth.OAuth
}

removeClient(instanceId: string): boolean {
const clientRemoved = this.clients[instanceId] !== undefined
delete this.clients[instanceId]
return clientRemoved
async getCsrfToken(
instanceId: InstanceId,
credentials: OAuthCredentials,
): Promise<CSRFTokenResponse> {
const authService = this.getAuthenticatedClient(instanceId, credentials)

return new Promise((resolve, reject) => {
authService.get(
this.endpoints[instanceId] +
'?' +
new URLSearchParams({
action: 'query',
meta: 'tokens',
format: 'json',
}).toString(),
credentials.accessToken,
credentials.accessTokenSecret,
(err, data) => {
if (err) {
console.error(err)
reject(err)
return
}
resolve(JSON.parse(data as string) as CSRFTokenResponse)
},
)
})
}
Comment on lines +56 to 83
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Be explicit with type=csrf and harden JSON parsing.

Add type=csrf to the query to avoid provider defaults changing; also guard JSON.parse errors and validate shape.

Apply:

       authService.get(
         this.endpoints[instanceId] +
           '?' +
           new URLSearchParams({
             action: 'query',
             meta: 'tokens',
+            type: 'csrf',
             format: 'json',
           }).toString(),
         credentials.accessToken,
         credentials.accessTokenSecret,
         (err, data) => {
           if (err) {
             console.error(err)
             reject(err)
             return
           }
-          resolve(JSON.parse(data as string) as CSRFTokenResponse)
+          try {
+            const parsed = JSON.parse(data as string) as CSRFTokenResponse
+            if (!parsed?.query?.tokens?.csrftoken) {
+              return reject(new Error('Missing csrftoken in response'))
+            }
+            resolve(parsed)
+          } catch (e) {
+            reject(e)
+          }
         },
       )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
async getCsrfToken(
instanceId: InstanceId,
credentials: OAuthCredentials,
): Promise<CSRFTokenResponse> {
const authService = this.getAuthenticatedClient(instanceId, credentials)
return new Promise((resolve, reject) => {
authService.get(
this.endpoints[instanceId] +
'?' +
new URLSearchParams({
action: 'query',
meta: 'tokens',
format: 'json',
}).toString(),
credentials.accessToken,
credentials.accessTokenSecret,
(err, data) => {
if (err) {
console.error(err)
reject(err)
return
}
resolve(JSON.parse(data as string) as CSRFTokenResponse)
},
)
})
}
async getCsrfToken(
instanceId: InstanceId,
credentials: OAuthCredentials,
): Promise<CSRFTokenResponse> {
const authService = this.getAuthenticatedClient(instanceId, credentials)
return new Promise((resolve, reject) => {
authService.get(
this.endpoints[instanceId] +
'?' +
new URLSearchParams({
action: 'query',
meta: 'tokens',
type: 'csrf',
format: 'json',
}).toString(),
credentials.accessToken,
credentials.accessTokenSecret,
(err, data) => {
if (err) {
console.error(err)
reject(err)
return
}
try {
const parsed = JSON.parse(data as string) as CSRFTokenResponse
if (!parsed?.query?.tokens?.csrftoken) {
return reject(new Error('Missing csrftoken in response'))
}
resolve(parsed)
} catch (e) {
reject(e)
}
},
)
})
}
🤖 Prompt for AI Agents
In backend/src/services/wikibase-clients.ts around lines 56 to 83, the query
that requests tokens should explicitly include type=csrf and the JSON parsing
must be hardened: add type: 'csrf' to the URLSearchParams; wrap JSON.parse in a
try/catch and reject with a descriptive error if parsing fails; after parsing,
validate the parsed object has the expected shape (e.g., top-level
query.tokens.csrf or whatever CSRFTokenResponse requires) and reject if
validation fails; keep returning the parsed CSRFTokenResponse on success.

}
15 changes: 9 additions & 6 deletions backend/src/services/wikibase.service.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { PropertySearchResult } from '@backend/api/wikibase/schemas'
import type { InstanceId, PropertySearchResult } from '@backend/api/wikibase/schemas'
import { WikibaseClient } from '@backend/services/wikibase-clients'
import type {
WikibaseGetEntitiesResponse,
Expand Down Expand Up @@ -32,7 +32,7 @@ interface AllPagesResponse {

export class WikibaseService extends WikibaseClient {
async fetchAllProperties(
instanceId: string,
instanceId: InstanceId,
db: DuckDBConnection,
): Promise<{ total: number; inserted: number }> {
let total = 0
Expand Down Expand Up @@ -95,7 +95,7 @@ export class WikibaseService extends WikibaseClient {
* Search for properties in the specified Wikibase instance
*/
async searchProperties(
instanceId: string,
instanceId: InstanceId,
query: string,
options: SearchOptions,
): Promise<SearchResponse<PropertySearchResult>> {
Expand Down Expand Up @@ -140,7 +140,10 @@ export class WikibaseService extends WikibaseClient {
/**
* Get property details by ID
*/
async getProperty(instanceId: string, propertyId: PropertyId): Promise<PropertyDetails | null> {
async getProperty(
instanceId: InstanceId,
propertyId: PropertyId,
): Promise<PropertyDetails | null> {
const client = this.getClient(instanceId)

const params = {
Expand Down Expand Up @@ -190,7 +193,7 @@ export class WikibaseService extends WikibaseClient {
* Search for items in the specified Wikibase instance
*/
async searchItems(
instanceId: string,
instanceId: InstanceId,
query: string,
options: SearchOptions,
): Promise<SearchResponse<ItemSearchResult>> {
Expand Down Expand Up @@ -243,7 +246,7 @@ export class WikibaseService extends WikibaseClient {
/**
* Get item details by ID
*/
async getItem(instanceId: string, itemId: ItemId): Promise<ItemDetails | null> {
async getItem(instanceId: InstanceId, itemId: ItemId): Promise<ItemDetails | null> {
const client = this.getClient(instanceId)

const params = {
Expand Down
2 changes: 1 addition & 1 deletion backend/src/types/mediawiki-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { t } from 'elysia'
// Base API Configuration
export interface MediaWikiConfig {
endpoint: string
userAgent?: string
userAgent: string
timeout?: number
username?: string
password?: string
Expand Down
3 changes: 2 additions & 1 deletion backend/tests/api/project/project.wikibase.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
type Label,
type WikibaseCreateSchema,
} from '@backend/api/project/project.wikibase'
import type { InstanceId } from '@backend/api/wikibase/schemas'
import { closeDb, databasePlugin, getDb, initializeDb } from '@backend/plugins/database'
import type { ItemId } from '@backend/types/wikibase-schema'
import { treaty } from '@elysiajs/eden'
Expand Down Expand Up @@ -385,7 +386,7 @@ describe('Wikibase API', () => {
})

const updateData = {
wikibase: 'commons',
wikibase: 'commons' as InstanceId,
schema: schema,
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import type { WikibaseDataType } from '@backend/types/wikibase-schema'
import { useColumnDataTypeIndicators } from '@frontend/features/data-processing/composables/useColumnDataTypeIndicators'
import type { ColumnInfo } from '@frontend/shared/types/wikibase-schema'
import { readonly, ref } from 'vue'
Expand Down
Loading