Marklogic

For a recap on what-is-marklogic-server, see this article.

Documents

Add Documents

Using Javascript: From Files

'use strict';
declareUpdate(); // Mandatory to edit resources in MArklogic
xdmp.documentLoad("path/file.json"); // Keep Uri as path/file.json
xdmp.documentLoad("path/file.json", {'uri': 'NewUriPath'}); // Change Uri to NewUriPath

/*Load Bulk documents*/
const pathToDocs = '/home/Docs';
const docResults =  xdmp.filesystemDirectory(pathYoDocs);
docResults.forEach(fucntion(doc){ xdmp.documentLoad(doc.pathname, {'uri':'/patents/'+doc.filename})});

Using Javascript: Document created in memory ( form, response to api, etc …)

'use strict';
declareUpdate(); 
xdmp.documentInsert("uri",
 {
    "title": "my title",
    "description": "This is a description"
 }

Using Rest API

curl -X PUT 
--anyauth 
--user admin:admin 
-i
-H "Content-Type:application/json" 
-d '{
      "patent":
      {
        "title": "title", 
        "description": " ..."
      }
    }'
"http://localhost:8060/v1/documents?uri=/patent/file.json"

Delete Documents

'use strict';
declareUpdate();
xdmp.documentDelete("uri");

Read Documents

Using javascript

'use strict';
cts.doc("uri");

Using Rest API

curl -X GET 
--anyauth 
--user admin:admin 
"http://localhost:8060/v1/documents?uri=/patent/file.json"

Modify Documents

'use strict';
declareUpdate();
const builder =  new NodeBuilder();
const node=  builder.addText("....").toNode();
xdmp.nodeReplace(cts.doc(uri).xpath("/patent/description"), node);

Directories & Collections

Directories

Come from document URIs. Marklogic stores data in forests. Actually, there are no directories created in the filesystem, however, as a developer, one can treats URIs as path to access to directories & subdirectories.

xdmp.directory('/directoryA/');

Collections

Are more like tags for documents (for instance, hashtags on Twitter). Not mandatory

'use strict';
xdmp.documentAddCollection('uri', ['collection1', 'collection2']);
xdmp.documentRemoveCollection('uri', ['collection1']);
fn.collection('collection1') // Get all documents in collection1

Permissions & Roles

Add Permissions to Documents

'use strict';
declareUpdate();
/* Create Document & Assign role*/
xdmp.documentLoad("path/file.json", 
    {
        'uri': 'NewUriPath',
        'collections' : ['collection1', 'collection2'],
        'permissons': 
            [
                xdmp.permission('patents-user-role', 'read'),
                xdmp.permission('patents-user-role', 'update'),
            ]
    });

/* Assign role to Existing Document*/
xdmp.documentAddPermissions("path/file.json",
            [
                xdmp.permission('patents-user-role', 'read'),
                xdmp.permission('patents-user-role', 'update'),
            ]
    );

Role

Create Role Via Javascript

'use strict';
declareUpadte();
cosnt sec = require('/Marklogic/security.xqy');
sec.createRole(
        'patents-user-role',
        'Role for ...',
        [],
        null,
        null
    );

Create Role Via the Admin Interface

curl -X post 
--anyauth 
-u admin:admin 
-H "Content-Type:application/json" 
-d '{
      "role-name":"patents-user-role", 
      "description": "role for ..."
    }'
http://localhost:8002/manage/v2/roles

Users

Create User Via Javascript

'use strict';
declareUpadte();
cosnt sec = require('/Marklogic/security.xqy');
sec.createUser(
        'patents-user',
        'User for ...',
        'user',
        'patents-user-role',
        null,
        null
    );

Create User Via the Admin Interface

curl -X post 
--anyauth 
-u admin:admin 
-H "Content-Type:application/json" 
-d '{
      "user-name":"patents-user", 
      "password":"user",
       "role":["patents-user-role]
    }'
http://localhost:8002/manage/v2/users

Excecute codes as another user

'use strict';
cosnt admin = require ('/Marklogic/admin.xqy');
xdmp.invokeFunction(
    function(){return cts.doc('uri');},
    {'userId': xdmp.user('admin')
})

MLCP – MKL Content Pump

Build Upload Documents

  • To upload Triples: -input_file_type RDF
mlcp.sh (.bat) import -host localhost -port 8060 -username admin -password admin -mode local 
-input_file_path C:\path\abc 
-output_uri_replace "C:\path\abc, 'abc'" 
-output_permissions patents-user-role,read,patents-admin-role,update

Split a large Document

  • To split a large documents into individual documents: -input_file_type aggregates
  • Tell Marklogic which element to use for designating a new document: -aggregate_record_element
<contacts>
    <contact>
        <id>
        ...
    </contact>
    <contact>
        <id>
        ...
    </contact>
</contacts>
mlcp.sh (.bat) import -host localhost -port 8060 -username admin -password admin -mode local 
-input_file_path C:\path\contacts.xml
-input_file_type aggregates
-aggregate_record_element contact
-output_uri_prefix /path/contact
-uri_id id 
-output_uri_suffix .xml
-output_collections 'raw'

Load Documents from CSV files

  • To upload from Triples: -input_file_type delimited_text
mlcp.sh (.bat) import -host localhost -port 8060 -username admin -password admin -mode local 
-input_file_path C:\path\contacts.csv
-input_file_type delimited_text
-output_uri_prefix /path/contact
-output_uri_suffix .xml
-document_type json
-output_collections 'raw'

Transform Data While Loading

  • To transform Data While Loading: -transform_module
mlcp.sh (.bat) import -host localhost -port 8060 -username admin -password admin -mode local 
-input_file_path C:\path\contacts.xml
-transform_module /mlcp-transfer-something.xqy (.sjs) #Write the transformation code
-transform_namespace "http://marklogic.com/.../transform-something"
-transform_param "value"
-output_uri_replace "C:\path\abc, 'abc'" 

SQL

  • See optic API for better queryies.

To make SQL Queries to MKL Databases:
1. define a Template.

// JSON example
 {
  "id": 11,
  "name" : "my name",
  "role" : "role"
 }

const tde = require ('/Marklogic/tde');
const template = xdmp.toJSON (
    {
        'template':{
            'context':'/id', // Looks for all the documents that have id in the top level property (node)
            'rows':[
                {
                    'schemaName':'TheSchemaName',
                    'viewName':'TheViewName',
                    'columns':[
                        {
                            'name':'...',
                            'scalarType':'string',
                            'val':'../name', //It is like going back to parent & accessing name in the example json above
                            'nullable':true
                        },
                        {
                            'name':'...',
                            'scalarType':'string',
                            'val':'../name', //It is like going back to parent & accessing name in the example json above
                            'nullable':true
                        },
                    ]
                }
            ]
        }

    }
);
// See Schema 
tde.getView("TheSchemaName", "TheViewName")

  1. Define an ODBC server port
  2. Install the Marklogic ODBC driver
  3. Insert the template.A view is generated containing rows that SQL queries can be run aginst
  4. Query
SELECT * FROM TheViewName

Triples

SELECT ?var1 ?var2
WHERE {
        <http://marklogic/.../Han_solo> <http://marklogic/.../playedBy> ?var1 . -- the dot (.) is like AND
        ?var1 <http://marklogic/.../birthdate> ?var2
        }


Data

Modify Data

Add Envelope

(: Use enveloppe on data to harmonize them. :)
let envelope : = ...
xdmp:document-insert("/path/....xml",envelope, (), "collection1")

Add Node to Envelope

xdmp:node-insert-before(fn:doc("/path/....xml")/envelope/source,<c></c>) (: Add <c> before source :)

Add Node Child in Node

let doc := fn:doc("/path/....xml")
letcompany := doc/envelope/source/contact/company/text()
xdmp:node-insert-child(doc/envelope/source/c, <b>{$company}</b>) (: Add <b> as a child of c :)

Query an Envelope

for c in /envelope/c[company = "my company"]
return concat(c/firstName/text(), "", $c/lastName/tec())

Batch Process

CoRB – Content Reprocessing in Bulk

With Gradle

With CLI

corb.sh (.bat) myFile.properties

# myFile.properties
# XCC-USERNAME=admin
# XCC-PASSWORD=admin
# XCC-HOSTNAME=localhost
# XCC-DBNAME=db
# THREAD-COUNT=4
# URIS-MODULE=file_uris.xqy|ADHOC  // ADHOC means from here the .properties file is, find the file_uris.xqy there
# PROCESS-MODULE=file_transformation.xqy|ADHOC

Data Movement SDK

Load & transform large numbers of documents (MLCP+CoRB). Woks with Java

Written by

Albert Oplog

Hi, I'm Albert Oplog. I would humbly like to share my tech journey with people all around the world.