x-templates: # `x-templates` field contains reusable templates for metadata extraction services that user can define under `services` field. # See docker-compose docs on re-using configuration fragments: https://docs.docker.com/compose/compose-file/compose-file-v3/#extension-fields crawler-script: &crawler-script | # Download JDBC jars if needed if [[ $$DOWNLOAD_JDBC == true ]] && ! [ -f "$$DOWNLOAD_JDBC_DST" ]; then echo "Downloading JDBC drivers from URL: $$DOWNLOAD_JDBC_URL" mkdir -p "$$(dirname $$DOWNLOAD_JDBC_DST)" curl "$$DOWNLOAD_JDBC_URL" -o "$$DOWNLOAD_JDBC_DST" if [[ $$DOWNLOAD_JDBC_DST == *.tar ]] || [[ $$DOWNLOAD_JDBC_DST == *.tar.gz ]]; then tar -xvf "$$DOWNLOAD_JDBC_DST" -C "$$(dirname $$DOWNLOAD_JDBC_DST)" --transform='s/.*\///' fi fi # Load the env variables from credential secret file if it exists if [ -n "$$CREDENTIAL_SECRET_PATH" ]; then # Use jq to read each key-value pair and export them as environment variables while IFS="=" read -r key value; do # Exporting each key-value pair as an environment variable echo "Setting environment variable $$key from credential secret" export "$$key=$$value" done < <(jq -r 'to_entries|map("\(.key)=\(.value)")|.[]' $$CREDENTIAL_SECRET_PATH) fi export URL="$$(bash -c "echo \"$$URL_TEMPLATE\"")" export EXTRA_PROP="$$(bash -c "echo \"$$EXTRA_PROP_TEMPLATE\"")" export DEFAULT_FILTER="{}" export INCLUDE_FILTER="$$(bash -c "echo '$${INCLUDE_FILTER_TEMPLATE:=$$DEFAULT_FILTER}'")" export EXCLUDE_FILTER="$$(bash -c "echo '$${EXCLUDE_FILTER_TEMPLATE:=$$DEFAULT_FILTER}'")" export USE_SOURCE_SCHEMA_FILTERING="$$(bash -c "echo '$${USE_SOURCE_SCHEMA_FILTERING:=false}'")" echo "Source schema filtering set to $$USE_SOURCE_SCHEMA_FILTERING" echo "Exclude table types set to $$EXCLUDE_TABLE_TYPES" echo "Has database abstraction set to $$HAS_DATABASE_ABSTRACTION" export USE_JDBC_INTERNAL_METHODS="$$(bash -c "echo '$${USE_JDBC_INTERNAL_METHODS:=true}'")" echo "JDBC Internal Methods set to $$USE_JDBC_INTERNAL_METHODS" echo $$EXCLUDE_FILTER echo $$INCLUDE_FILTER echo $$EXTRA_PROP java \ -cp \ /jdbc-metadata-extractor.jar:/jars/* \ MainKt \ --class-name "$$DRIVER" \ --url "$$URL" \ --extra-prop "$$EXTRA_PROP" \ --runtime-prop "$$RUNTIME_PROPS" \ --include-filter "$$INCLUDE_FILTER" \ --ex-filter "$$EXCLUDE_FILTER" \ --use-source-schema-filtering "$$USE_SOURCE_SCHEMA_FILTERING" \ --use-jdbc-internal-methods "$$USE_JDBC_INTERNAL_METHODS" \ --system-schema-regex "$$SYSTEM_SCHEMA_REGEX" \ --temp-table-regex "$$TEMP_TABLE_REGEX" \ --sys-database-regex "$$SYSTEM_DATABASE_REGEX" \ --output-prefix /output \ metadata \ --fetch-primary-keys "$$FETCH_PRIMARY_KEYS" \ --exclude-table-types "$$EXCLUDE_TABLE_TYPES" \ --fetch-imported-keys "$$FETCH_IMPORTED_KEYS" \ --table-companion-sql "$$TABLE_COMPANION_SQL" \ --column-companion-sql "$$COLUMN_COMPANION_SQL" \ --dynamic-query-config "$$DYNAMIC_QUERY_CONFIG" \ --extra-companion-sqls "$$EXTRA_COMPANION_SQL" \ --database-companion-sql "$$DATABASE_COMPANION_SQL" \ --default-database-name "$$DEFAULT_DATABASE_NAME" \ --default-schema-name "$$DEFAULT_SCHEMA_NAME" \ --has-database-abstraction "$$HAS_DATABASE_ABSTRACTION" \ --schema-companion-sql "$$SCHEMA_COMPANION_SQL" # Hive crawler script hive-crawler-script: &hive-crawler-script | # Download JDBC jars if needed if [[ $$DOWNLOAD_JDBC == true ]] && ! [ -f "$$DOWNLOAD_JDBC_DST" ]; then echo "Downloading JDBC drivers from URL: $$DOWNLOAD_JDBC_URL" mkdir -p "$$(dirname $$DOWNLOAD_JDBC_DST)" curl "$$DOWNLOAD_JDBC_URL" -o "$$DOWNLOAD_JDBC_DST" if [[ $$DOWNLOAD_JDBC_DST == *.tar ]] || [[ $$DOWNLOAD_JDBC_DST == *.tar.gz ]]; then tar -xvf "$$DOWNLOAD_JDBC_DST" -C "$$(dirname $$DOWNLOAD_JDBC_DST)" --transform='s/.*\///' fi fi # Load the env variables from credential secret file if it exists if [ -f $$CREDENTIAL_SECRET_PATH ]; then # Use jq to read each key-value pair and export them as environment variables while IFS="=" read -r key value; do # Exporting each key-value pair as an environment variable echo "Setting environment variable $$key from credential secret" export "$$key=$$value" done < <(jq -r 'to_entries|map("\(.key)=\(.value)")|.[]' $$CREDENTIAL_SECRET_PATH) fi export URL="$$(bash -c "echo \"$$URL_TEMPLATE\"")" export EXTRA_PROP="$$(bash -c "echo \"$$EXTRA_PROP_TEMPLATE\"")" echo $$EXTRA_PROP mkdir /output/mine mkdir /tmp/sqls # Add database echo {\"TABLE_CAT\":\"Hive\"} >> /output/mine/databases.json # List schemas java \ -cp \ /jdbc-metadata-extractor.jar:/jars/* \ MainKt \ --class-name "$$DRIVER" \ --url "$$URL" \ --extra-prop "$$EXTRA_PROP" \ --runtime-prop "$$RUNTIME_PROPS" \ --output-prefix /output/list_schemas/mine \ --output-type JSON \ mine \ --sql "show schemas" \ --input-parent-directory "/tmp" \ --marker-file-path /tmp/marker/mine \ --ore-filename-prefix "unfiltered_schemas" echo "Getting other dependencies" # Get jq apt-get update > /dev/null apt-get install jq -y -q > /dev/null # Filter and get schema details file=$$(cat /output/list_schemas/mine/unfiltered_schemas.json) i=0 for line in $$file do schema_name=$$( jq -r ".DATABASE_NAME" <<< "$$line" ) if [[ $$schema_name =~ $${SCHEMA_EXCLUDE_REGEX} ]]; then echo "Skipping schema $$schema_name due to exclude regex $${SCHEMA_EXCLUDE_REGEX}" continue elif ! [[ $$schema_name =~ $${SCHEMA_INCLUDE_REGEX} ]]; then echo "Skipping schema $$schema_name due to include regex $${SCHEMA_INCLUDE_REGEX}" continue fi # generate schema related sqls echo "describe schema extended \`$$schema_name\`" >> /tmp/sqls/describe_schemas_$$i.sql # list all tables in schema java \ -cp \ /jdbc-metadata-extractor.jar:/jars/* \ MainKt \ --class-name "$$DRIVER" \ --url "$$URL" \ --extra-prop "$$EXTRA_PROP" \ --runtime-prop "$$RUNTIME_PROPS" \ --output-prefix /tmp/tables_per_schema/mine \ --output-type JSON \ mine \ --sql "show tables in \`$$schema_name\`" \ --input-parent-directory "/tmp" \ --marker-file-path /tmp/marker/mine \ --ore-filename-prefix "tables-$$i" # generate describe table sqls table_index=0 table_file=$$(cat /tmp/tables_per_schema/mine/tables-$$i.json) for table_file_line in $$table_file do table_name=$$( jq -r ".TAB_NAME" <<< "$$table_file_line" ) if [[ $$table_name =~ $${TEMP_TABLE_REGEX} ]]; then echo "Skipping table $$table_name due to exclude regex $${TEMP_TABLE_REGEX}" continue fi echo "describe extended \`$$schema_name\`.\`$$table_name\`" >> /tmp/sqls/describe_tables-$${i}_$${table_index}.sql table_index=$$(( table_index + 1 )) done i=$$(( i + 1 )) done # describe each schema java \ -cp \ /jdbc-metadata-extractor.jar:/jars/* \ MainKt \ --class-name "$$DRIVER" \ --url "$$URL" \ --extra-prop "$$EXTRA_PROP" \ --runtime-prop "$$RUNTIME_PROPS" \ --output-prefix /output/mine \ --output-type JSON \ mine \ --input-parent-directory "/tmp/sqls" \ --sql-files-pattern "describe_schemas_*.sql" \ --marker-file-path /tmp/marker/mine \ --ore-filename-prefix "schemas" # describe each table java \ -cp \ /jdbc-metadata-extractor.jar:/jars/* \ MainKt \ --class-name "$$DRIVER" \ --url "$$URL" \ --extra-prop "$$EXTRA_PROP" \ --runtime-prop "$$RUNTIME_PROPS" \ --output-prefix /output/mine \ --output-type JSON \ mine \ --input-parent-directory "/tmp/sqls" \ --sql-files-pattern "describe_tables-*.sql" \ --marker-file-path /tmp/marker/mine \ --group-result-per-sql "true" \ --ore-filename-prefix "tables" extract-service: &extract image: jdbc-metadata-extractor-master command: - "bash" - "-c" - *crawler-script privileged: false network_mode: host volumes: - &shared-jdbc-drivers jars:/jars hive-extract-service: &hive-extract image: jdbc-metadata-extractor-master command: - "bash" - "-c" - *hive-crawler-script privileged: false network_mode: host volumes: - *shared-jdbc-drivers psql: &psql DOWNLOAD_JDBC: "true" DOWNLOAD_JDBC_URL: "https://atlan-public.s3-eu-west-1.amazonaws.com/atlan/jdbc/postgres.tar.gz" DOWNLOAD_JDBC_DST: "/jars/postgres.tar.gz" DRIVER: "org.postgresql.Driver" URL_TEMPLATE: "jdbc:postgresql://$$HOST:$$PORT/$$DATABASE?ApplicationName=atlan&loginTimeout=5&connectionTimeout=5&tcpKeepAlive=true&readOnly=true&prepareThreshold=0" EXTRA_PROP_TEMPLATE: '{\"database\": \"$$DATABASE\"}' SYSTEM_SCHEMA_REGEX: '["pg_*", "information_*"]' TEMP_TABLE_REGEX: "[]" SYSTEM_DATABASE_REGEX: "[]" RUNTIME_PROPS: '{"user":"USERNAME","username":"USERNAME","password":"PASSWORD"}' FETCH_PRIMARY_KEYS: "true" FETCH_IMPORTED_KEYS: "true" EXCLUDE_TABLE_TYPES: "[]" TABLE_COMPANION_SQL: | SELECT current_database() as TABLE_CAT, COALESCE(T.table_schema, MV.schemaname) as TABLE_SCHEM, COALESCE(T.table_name, MV.matviewname) as TABLE_NAME, (CASE WHEN c.reltuples < 0 THEN NULL WHEN c.relpages = 0 THEN float8 '0' ELSE c.reltuples / c.relpages END * (pg_relation_size(c.oid) / pg_catalog.current_setting('block_size')::int) )::bigint as ROW_COUNT, C.relnatts as COLUMN_COUNT, C.relkind as TABLE_KIND, C.relispartition as IS_PARTITION, P.partstrat as PARTITION_STRATEGY, PC.parition_count as PARTITION_COUNT, PARTITION.parent_name as PARENT_TABLE_NAME, PARTITION.parent_table_kind as PARTITIONED_PARENT_TABLE, PARTITION_RANGE.PARTITION_CONSTRAINT as PARTITION_CONSTRAINT, P.partnatts as NUMBER_COLUMNS_IN_PART_KEY, P.partattrs as COLUMNS_PARTICIPATING_IN_PART_KEY, COALESCE(V.definition, MV.definition) as VIEW_DEFINITION, T.* FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) LEFT JOIN pg_stat_user_tables PSUT ON (C.oid = PSUT.relid) LEFT JOIN information_schema.tables T ON (C.relname = T.table_name AND N.nspname = T.table_schema) LEFT JOIN pg_views V ON (T.table_name = V.viewname) LEFT JOIN pg_matviews MV ON (C.relname = MV.matviewname) LEFT JOIN pg_partitioned_table P on C.oid = P.partrelid LEFT JOIN (SELECT parent.relname AS table_name, COUNT(*) as parition_count FROM pg_inherits JOIN pg_class parent ON pg_inherits.inhparent = parent.oid JOIN pg_class child ON pg_inherits.inhrelid = child.oid JOIN pg_namespace nmsp_parent ON nmsp_parent.oid = parent.relnamespace JOIN pg_namespace nmsp_child ON nmsp_child.oid = child.relnamespace GROUP BY table_name) AS PC ON (C.relname = PC.table_name) LEFT JOIN (SELECT child.relname AS table_name, parent.relname AS parent_name, parent.relispartition AS parent_table_kind FROM pg_inherits JOIN pg_class parent ON pg_inherits.inhparent = parent.oid JOIN pg_class child ON pg_inherits.inhrelid = child.oid JOIN pg_namespace nmsp_parent ON nmsp_parent.oid = parent.relnamespace JOIN pg_namespace nmsp_child ON nmsp_child.oid = child.relnamespace WHERE parent.relkind = 'p') AS PARTITION ON (C.relname = PARTITION.table_name) LEFT JOIN (SELECT c.relname AS PARTITION_NAME, pg_get_expr(c.relpartbound, c.oid, true) AS PARTITION_CONSTRAINT from pg_class c where c.relispartition = 'true' and c.relkind = 'r') AS PARTITION_RANGE ON (C.relname = PARTITION_RANGE.PARTITION_NAME) WHERE N.nspname in (SELECT schema_name FROM INFORMATION_SCHEMA.SCHEMATA WHERE schema_name not like 'pg_%%' and schema_name != 'information_schema') AND C.relkind != 'i' AND C.relkind != 'I' COLUMN_COMPANION_SQL: | SELECT current_database() as TABLE_CAT, columns.TABLE_SCHEMA as TABLE_SCHEM, columns.ordinal_position AS ORDINAL_POSITION, C.relispartition as BELONGS_TO_PARTITION, CASE WHEN C.relkind = 'p' THEN true ELSE false END AS PARTITIONED_TABLE, columns.*, col_constraints.constraint_type, col_constraints.constraint_name FROM information_schema.columns AS columns LEFT OUTER JOIN ( SELECT tab_constraints.constraint_type, col_constraints.constraint_name, col_constraints.table_schema, col_constraints.table_name, col_constraints.column_name FROM information_schema.key_column_usage AS col_constraints INNER JOIN information_schema.table_constraints AS tab_constraints ON col_constraints.table_schema = tab_constraints.table_schema AND col_constraints.table_name = tab_constraints.table_name AND col_constraints.constraint_name = tab_constraints.constraint_name ) AS col_constraints ON columns.table_schema = col_constraints.table_schema AND columns.table_name = col_constraints.table_name AND columns.column_name = col_constraints.column_name INNER JOIN ( SELECT N.nspname, C.relname, C.relispartition, c.relkind FROM pg_class AS C INNER JOIN pg_namespace N ON N.oid = C.relnamespace ) AS C ON C.nspname = columns.table_schema AND C.relname = columns.table_name WHERE columns.table_schema NOT LIKE 'pg_%%' AND columns.table_schema != 'information_schema' EXTRA_COMPANION_SQL: | { "procedures": "SELECT Upper(current_database()) AS TABLE_CAT, N.nspname AS TABLE_SCHEM, Current_database() AS PROCEDURE_CAT, N.nspname AS PROCEDURE_SCHEM, P.proname AS PROCEDURE_NAME, B.usename AS PROC_OWNER, P.prosrc AS ROUTINE_DEFINITION FROM pg_catalog.pg_namespace N JOIN pg_catalog.pg_proc P ON pronamespace = N.oid JOIN pg_user B ON B.usesysid = P.proowner WHERE nspname NOT IN ('information_schema', 'pg_catalog') AND B.usename != 'rdsdb';" } DYNAMIC_QUERY_CONFIG: "{}" DEFAULT_DATABASE_NAME: DEFAULT_SCHEMA_NAME: HOST: PORT: 5432 DATABASE: USERNAME: PASSWORD: psql-iam: &psql-iam <<: *psql DRIVER: "io.magj.iamjdbcdriver.PostgreSqlIamAuthJdbcDriverWrapper" URL_TEMPLATE: "jdbc:iampostgresql://$$HOST:$$PORT/$$DATABASE?ApplicationName=atlan&loginTimeout=5&connectionTimeout=5&tcpKeepAlive=true&readOnly=true&delegateJdbcDriverSchemeName=postgresql&prepareThreshold=0" RUNTIME_PROPS: '{ "user": "USERNAME", "awsAccessKeyId": "AWS_ACCESS_KEY_ID", "awsSecretAccessKey": "AWS_SECRET_ACCESS_KEY" }' USERNAME: AWS_ACCESS_KEY_ID: AWS_SECRET_ACCESS_KEY: mysql: &mysql DOWNLOAD_JDBC: "true" DOWNLOAD_JDBC_URL: "https://atlan-public.s3-eu-west-1.amazonaws.com/atlan/jdbc/mysql.tar.gz" DOWNLOAD_JDBC_DST: "/jars/mysql.tar.gz" DRIVER: "com.mysql.cj.jdbc.Driver" URL_TEMPLATE: "jdbc:mysql://$$HOST:$$PORT" EXTRA_PROP_TEMPLATE: '{\"databaseTerm\": \"SCHEMA\"}' SYSTEM_SCHEMA_REGEX: '["^performance_schema$$", "^information_schema*", "^mysql$$", "^sys$$"]' TEMP_TABLE_REGEX: "[]" EXCLUDE_TABLE_TYPES: "[]" SYSTEM_DATABASE_REGEX: "[]" RUNTIME_PROPS: '{"user":"USERNAME","username":"USERNAME","password":"PASSWORD"}' FETCH_PRIMARY_KEYS: "false" FETCH_IMPORTED_KEYS: "false" TABLE_COMPANION_SQL: | select 'def' as TABLE_CAT, 'def' as TABLE_CATALOG, T.table_schema as TABLE_SCHEM, T.table_name as TABLE_NAME, T.TABLE_ROWS as ROW_COUNT, (T.INDEX_LENGTH + T.DATA_LENGTH) as BYTES, IF(T.TABLE_TYPE = 'VIEW', 'v', 't') as TABLE_KIND, IF(p.PARTITION_METHOD is NULL, FALSE, TRUE) as HAS_PARTITIONS, IF(p.PARTITION_METHOD is NULL, FALSE, TRUE) as IS_PARTITION, p.PARTITION_METHOD as PARTITION_STRATEGY, IF(p.PARTITION_METHOD is NULL, 0, p.parition_count) as PARTITION_COUNT, p.PARTITIONS, T.table_name as PARENT_TABLE_NAME, v.VIEW_DEFINITION as VIEW_DEFINITION, T.* from information_schema.tables T LEFT JOIN ( select TABLE_NAME, TABLE_SCHEMA, PARTITION_METHOD, count(*) as parition_count, GROUP_CONCAT(PARTITION_NAME order by PARTITION_NAME) as PARTITIONS from information_schema.PARTITIONS group by TABLE_NAME, TABLE_SCHEMA, PARTITION_METHOD ) as p on p.TABLE_NAME = T.TABLE_NAME and p.TABLE_SCHEMA = T.TABLE_SCHEMA LEFT join information_schema.VIEWS as v on v.TABLE_NAME = T.TABLE_NAME and v.TABLE_SCHEMA = T.TABLE_SCHEMA where T.TABLE_SCHEMA not in ('performance_schema', 'information_schema', 'mysql', 'sys'); COLUMN_COMPANION_SQL: | select 'def' as TABLE_CAT, 'def' as TABLE_CATALOG, columns.TABLE_SCHEMA as TABLE_SCHEM, columns.EXTRA as EXTRA_INFO, columns.*, IF(tab_constraints.constraint_type = 'PRIMARY KEY', true, false) as PRIMARY_KEY, tab_constraints.constraint_type, col_constraints.constraint_name from information_schema.columns as columns inner join information_schema.key_column_usage as col_constraints on columns.table_name = col_constraints.table_name and columns.table_schema = col_constraints.TABLE_SCHEMA and columns.column_name = col_constraints.column_name inner join information_schema.table_constraints as tab_constraints on tab_constraints.constraint_name = col_constraints.constraint_name and tab_constraints.CONSTRAINT_CATALOG = col_constraints.CONSTRAINT_CATALOG and tab_constraints.table_schema = col_constraints.table_schema and tab_constraints.TABLE_NAME = col_constraints.TABLE_NAME where columns.table_schema not in ('performance_schema', 'information_schema', 'mysql', 'sys'); EXTRA_COMPANION_SQL: | { "procedures": "SELECT 'def' as PROCEDURE_CAT, r.routine_schema as TABLE_SCHEM, r.routine_schema as PROCEDURE_SCHEM, r.definer as PROC_OWNER, r.routine_comment as REMARKS, r.routine_name as PROCEDURE_NAME, r.routine_type as PROCEDURE_TYPE, r.created, r.last_altered, r.ROUTINE_DEFINITION from INFORMATION_SCHEMA.ROUTINES r where r.routine_schema not in ('performance_schema', 'information_schema', 'mysql', 'sys');" } DATABASE_COMPANION_SQL: | select count(*) as SCHEMA_COUNT, 'def' as TABLE_CAT from information_schema.SCHEMATA where SCHEMA_NAME not in ('performance_schema', 'information_schema', 'mysql', 'sys'); DYNAMIC_QUERY_CONFIG: "{}" DEFAULT_DATABASE_NAME: default DEFAULT_SCHEMA_NAME: HOST: PORT: 3306 USERNAME: PASSWORD: mysql-iam: &mysql-iam <<: *mysql DRIVER: "io.magj.iamjdbcdriver.MySqlIamAuthJdbcDriverWrapper" URL_TEMPLATE: "jdbc:iammysql://$$HOST:$$PORT?delegateJdbcDriverSchemeName=mysql" RUNTIME_PROPS: '{ "user": "USERNAME", "awsAccessKeyId": "AWS_ACCESS_KEY_ID", "awsSecretAccessKey": "AWS_SECRET_ACCESS_KEY" }' USERNAME: AWS_ACCESS_KEY_ID: AWS_SECRET_ACCESS_KEY: oracledb: &oracledb DOWNLOAD_JDBC: "true" DOWNLOAD_JDBC_URL: "https://atlan-public.s3-eu-west-1.amazonaws.com/atlan/jdbc/oracle.tar.gz" DOWNLOAD_JDBC_DST: "/jars/oracle.tar.gz" DRIVER: "oracle.jdbc.driver.OracleDriver" URL_TEMPLATE: "jdbc:oracle:thin:@//$$HOST:$$PORT/$$SERVICE" EXTRA_PROP_TEMPLATE: '{\"service\": \"$$SERVICE\"}' SYSTEM_SCHEMA_REGEX: '["^ANONYMOUS$$", "^APPQOSSYS$$", "^AUDSYS$$", "^CTXSYS$$", "^DBSFWUSER$$", "^DBSNMP$$", "^DIP$$", "^DVF$$", "^DVSYS$$", "^GGSYS$$", "^GSMADMIN_INTERNAL$$", "^GSMCATUSER$$", "^GSMROOTUSER$$", "^GSMUSER$$", "^LBACSYS$$", "^MDDATA$$", "^MDSYS$$", "^OJVMSYS$$", "^OLAPSYS$$", "^OPS\\$$ORACLE$$", "^ORACLE_OCM$$", "^ORDDATA$$", "^ORDPLUGINS$$", "^ORDSYS$$", "^OUTLN$$", "^REMOTE_SCHEDULER_AGENT$$", "^SI_INFORMTN_SCHEMA$$", "^SYS$$", "^SYS\\$$UMF$$", "^SYSBACKUP$$", "^SYSDG$$", "^SYSKM$$", "^SYSRAC$$", "^SYSTEM$$", "^WMSYS$$", "^XDB$$", "^XS\\$$NULL$$", "^RDSADMIN$$", "^APEX_040200$$", "^APEX_PUBLIC_USER$$", "^DEMO$$", "^FLOWS_FILES$$", "^FRANCK$$", "^HR$$", "^IX$$", "^OE$$", "^PM$$", "^RMAN$$", "^SCOTT$$", "^SH$$", "^SPATIAL_CSW_ADMIN_USR$$", "^SPATIAL_WFS_ADMIN_USR$$", "^SQLTXADMIN$$", "^SQLTXPLAIN$$"]' TEMP_TABLE_REGEX: "[]" EXCLUDE_TABLE_TYPES: "[]" SYSTEM_DATABASE_REGEX: "[]" RUNTIME_PROPS: '{"user":"USERNAME","password":"PASSWORD", "useFetchSizeWithLongColumn": true, "remarksReporting": true}' FETCH_PRIMARY_KEYS: "true" FETCH_IMPORTED_KEYS: "true" TABLE_COMPANION_SQL: | SELECT sys_context('USERENV', 'DB_NAME') AS TABLE_CAT, AC.OWNER AS TABLE_SCHEM, AC.TABLE_NAME AS TABLE_NAME, AC.OWNER AS TABLE_OWNER, (T.AVG_ROW_LEN * T.NUM_ROWS) AS TABLE_SIZE, AC.TABLE_TYPE, T.NUM_ROWS AS ROW_COUNT, (CASE WHEN TP.PARTITIONING_TYPE IS NULL THEN 0 ELSE 1 END) AS HAS_PARTITIONS, TP.PARTITIONING_TYPE AS PARTITION_STRATEGY, TP.PARTITION_COUNT AS PARTITION_COUNT, TP.PARTITIONING_KEY_COUNT AS NUMBER_COLUMNS_IN_PART_KEY, TP.TABLE_NAME AS PARENT_TABLE_NAME, T.CLUSTERING AS AUTO_CLUSTERING_ON, (CASE WHEN AV.TEXT IS NOT NULL THEN AV.TEXT ELSE AMV.QUERY END) AS VIEW_DEFINITION, (CASE WHEN AMV.MVIEW_NAME IS NOT NULL THEN 1 END) AS IS_MATERIALISED_VIEW, ATP.PARTITIONS, ASY.SYNONYM_NAME AS ALIAS, MVC.COMMENTS AS MV_REMARKS, T.* FROM ALL_CATALOG AC LEFT JOIN ALL_TABLES T ON T.OWNER = AC.OWNER AND T.TABLE_NAME = AC.TABLE_NAME LEFT JOIN ALL_PART_TABLES TP ON TP.TABLE_NAME = AC.TABLE_NAME AND TP.OWNER = AC.OWNER LEFT JOIN ALL_MVIEWS AMV ON T.OWNER = AMV.OWNER AND T.TABLE_NAME = AMV.MVIEW_NAME LEFT JOIN ALL_MVIEW_COMMENTS MVC ON MVC.OWNER = T.OWNER AND T.TABLE_NAME = MVC.MVIEW_NAME LEFT JOIN (SELECT TABLE_NAME, LISTAGG(PARTITION_NAME, ',') WITHIN GROUP ( ORDER BY PARTITION_NAME) PARTITIONS FROM ALL_TAB_PARTITIONS GROUP BY TABLE_NAME) ATP ON ATP.TABLE_NAME = AC.TABLE_NAME LEFT JOIN ALL_VIEWS AV ON AV.VIEW_NAME = AC.TABLE_NAME AND AV.OWNER = AC.OWNER LEFT JOIN ALL_SYNONYMS ASY ON ASY.TABLE_OWNER = AC.OWNER AND ASY.TABLE_NAME = AC.TABLE_NAME WHERE AC.OWNER NOT IN ('PUBLIC', 'ANONYMOUS', 'APPQOSSYS', 'AUDSYS', 'CTXSYS', 'DBSFWUSER', 'DBSNMP', 'DIP', 'DVF', 'DVSYS', 'GGSYS', 'GSMADMIN_INTERNAL', 'GSMCATUSER', 'GSMROOTUSER', 'GSMUSER', 'LBACSYS', 'MDDATA', 'MDSYS', 'OJVMSYS', 'OLAPSYS', 'OPS$ORACLE', 'ORACLE_OCM', 'ORDDATA', 'ORDPLUGINS', 'ORDSYS', 'OUTLN', 'REMOTE_SCHEDULER_AGENT', 'SI_INFORMTN_SCHEMA', 'SYS', 'SYS$UMF', 'SYSBACKUP', 'SYSDG', 'SYSKM', 'SYSRAC', 'SYSTEM', 'WMSYS', 'XDB', 'XS$NULL', 'RDSADMIN') COLUMN_COMPANION_SQL: | SELECT sys_context('USERENV', 'DB_NAME') AS TABLE_CAT, columns.OWNER AS TABLE_SCHEM, columns.NULLABLE AS IS_NULLABLE, columns.DATA_DEFAULT AS COLUMN_DEFAULT, columns.DATA_SCALE AS NUMERIC_SCALE, columns.char_length AS CHARACTER_MAXIMUM_LENGTH, columns.DATA_PRECISION AS DECIMAL_DIGITS, ac.constraint_names, ac.CONSTRAINT_TYPES, columns.COLLATION AS COLLATION_NAME, columns.USER_GENERATED as IS_GENERATED, auc.UPDATABLE AS IS_UPDATABLE, AO.OBJECT_TYPE AS PARENT_TYPE, (CASE WHEN AMV.MVIEW_NAME IS NOT NULL THEN 1 END) AS IS_MATERIALISED_VIEW, columns.* FROM ALL_TAB_COLS columns LEFT JOIN All_tables T ON T.OWNER = COLUMNS.OWNER AND T.TABLE_NAME = columns.TABLE_NAME LEFT JOIN ( SELECT acc.OWNER, acc.TABLE_NAME, acc.COLUMN_NAME, LISTAGG(acc.constraint_name, ',') AS CONSTRAINT_NAMES, LISTAGG( (CASE WHEN aci.constraint_type = 'P' THEN 'PRIMARY KEY' WHEN aci.constraint_type = 'C' THEN 'CHECK CONSTRAINT ON A TABLE' WHEN aci.constraint_type = 'U' THEN 'UNIQUE KEY' WHEN aci.constraint_type = 'R' THEN 'REFERENTIAL INTEGRITY' WHEN aci.constraint_type = 'V' THEN 'WITH CHECK OPTION, ON A VIEW' ELSE 'WITH READ ONLY, ON A VIEW' END), '|') AS CONSTRAINT_TYPES FROM ALL_CONS_COLUMNS acc INNER JOIN ALL_CONSTRAINTS aci ON (acc.CONSTRAINT_NAME = aci.CONSTRAINT_NAME AND acc.OWNER = aci.OWNER AND acc.TABLE_NAME = aci.TABLE_NAME) GROUP BY acc.OWNER, acc.TABLE_NAME, acc.COLUMN_NAME ) ac ON ac.OWNER = columns.Owner AND ac.table_name = columns.table_name AND ac.COLUMN_NAME = columns.COLUMN_NAME LEFT JOIN (SELECT * FROM ALL_OBJECTS WHERE OBJECT_TYPE != 'TABLE PARTITION') AO ON AO.OWNER = COLUMNS.OWNER AND AO.OBJECT_NAME = COLUMNS.TABLE_NAME LEFT JOIN ALL_UPDATABLE_COLUMNS auc ON auc.table_name = columns.table_name AND auc.owner = columns.OWNER AND auc.column_name = columns.COLUMN_NAME LEFT JOIN ALL_MVIEWS AMV ON columns.OWNER = AMV.OWNER AND columns.TABLE_NAME = AMV.MVIEW_NAME WHERE columns.Owner NOT IN ('RDSADMIN', 'ANONYMOUS', 'APPQOSSYS', 'AUDSYS', 'CTXSYS', 'DBSFWUSER', 'DBSNMP', 'DIP', 'DVF', 'DVSYS', 'GGSYS', 'GSMADMIN_INTERNAL', 'GSMCATUSER', 'GSMROOTUSER', 'GSMUSER', 'LBACSYS', 'MDDATA', 'MDSYS', 'OJVMSYS', 'OLAPSYS', 'OPS$ORACLE', 'ORACLE_OCM', 'ORDDATA', 'ORDPLUGINS', 'ORDSYS', 'OUTLN', 'REMOTE_SCHEDULER_AGENT', 'SI_INFORMTN_SCHEMA', 'SYS', 'SYS$UMF', 'SYSBACKUP', 'SYSDG', 'SYSKM', 'SYSRAC', 'SYSTEM', 'WMSYS', 'XDB', 'XS$NULL') EXTRA_COMPANION_SQL: "{}" DYNAMIC_QUERY_CONFIG: "{}" DEFAULT_DATABASE_NAME: DEFAULT_SCHEMA_NAME: HOST: PORT: 1521 SERVICE: USERNAME: PASSWORD: CREDENTIAL_SECRET_PATH: mssql: &mssql DOWNLOAD_JDBC: "true" DOWNLOAD_JDBC_URL: "https://atlan-public.s3-eu-west-1.amazonaws.com/atlan/jdbc/mssql.tar.gz" DOWNLOAD_JDBC_DST: "/jars/mssql.tar.gz" DRIVER: "com.microsoft.sqlserver.jdbc.SQLServerDriver" URL_TEMPLATE: "jdbc:sqlserver://$$HOST:$$PORT;database=$$DATABASE;loginTimeout=10;queryTimeout=60;applicationName=atlan;applicationIntent=ReadOnly" EXTRA_PROP_TEMPLATE: '' SYSTEM_SCHEMA_REGEX: '["^sys$$", "^information_schema$$", "^guest$$","^db_accessadmin$$","^db_backupoperator$$","^db_datareader$$","^db_datawriter$$","^db_ddladmin$$","^db_denydatareader$$","^db_denydatawriter$$","^db_owner$$","^db_securityadmin$$"]' TEMP_TABLE_REGEX: "[]" EXCLUDE_TABLE_TYPES: "[]" SYSTEM_DATABASE_REGEX: '["model", "msdb", "tempdb", "master"]' RUNTIME_PROPS: '{"user":"USERNAME","username":"USERNAME","password":"PASSWORD"}' FETCH_PRIMARY_KEYS: "true" FETCH_IMPORTED_KEYS: "true" TABLE_COMPANION_SQL: | select t.TABLE_CATALOG as TABLE_CAT, t.TABLE_SCHEMA as TABLE_SCHEM, t.TABLE_NAME as TABLE_NAME, t.TABLE_TYPE as TABLE_TYPE, CASE when p.PARTITIONS is NULL then 0 else 1 End as HAS_PARTITIONS, CASE when p.partition_count is NULL then 0 else p.partition_count End as PARTITION_COUNT, p.PARTITIONS, c.COLUMN_COUNT, OBJECT_DEFINITION(OBJECT_ID(concat(t.TABLE_CATALOG,'.',t.TABLE_SCHEMA,'.',v.TABLE_NAME))) as VIEW_DEFINITION from INFORMATION_SCHEMA.TABLES t LEFT JOIN ( select * from INFORMATION_SCHEMA.VIEWS ) as v on v.TABLE_NAME = t.TABLE_NAME and v.TABLE_SCHEMA = t.TABLE_SCHEMA LEFT JOIN ( select object_id, count(*) as partition_count, stuff((select ',' + CAST(partition_id AS VARCHAR(MAX)) from sys.partitions where object_id = p.object_id FOR XML PATH('') ), 1, 1, '') as PARTITIONS from sys.partitions p group by object_id ) as p on p.object_id = OBJECT_ID(T.TABLE_NAME) LEFT JOIN ( select count(*) as COLUMN_COUNT, TABLE_NAME, TABLE_SCHEMA from INFORMATION_SCHEMA.COLUMNS group by TABLE_NAME, TABLE_SCHEMA ) as c on c.TABLE_NAME = t.TABLE_NAME and c.TABLE_SCHEMA = t.TABLE_SCHEMA where t.TABLE_SCHEMA not in ('INFORMATION_SCHEMA', 'sys'); COLUMN_COMPANION_SQL: | select columns.TABLE_CATALOG as TABLE_CAT, columns.TABLE_SCHEMA as TABLE_SCHEM, col_constraints.CONSTRAINT_NAME, tab_constraints.constraint_type, columns.* from INFORMATION_SCHEMA.COLUMNS columns left join INFORMATION_SCHEMA.KEY_COLUMN_USAGE as col_constraints on columns.table_name = col_constraints.table_name and columns.table_schema = col_constraints.TABLE_SCHEMA and columns.column_name = col_constraints.column_name left join INFORMATION_SCHEMA.TABLE_CONSTRAINTS as tab_constraints on tab_constraints.constraint_name = col_constraints.constraint_name and tab_constraints.CONSTRAINT_CATALOG = col_constraints.CONSTRAINT_CATALOG and tab_constraints.table_schema = col_constraints.table_schema and tab_constraints.TABLE_NAME = col_constraints.TABLE_NAME; EXTRA_COMPANION_SQL: '{"procedures": "SELECT upper(r.SPECIFIC_CATALOG) as PROCEDURE_CAT, r.SPECIFIC_SCHEMA as PROCEDURE_SCHEM, r.routine_name as PROCEDURE_NAME, r.* from INFORMATION_SCHEMA.ROUTINES r;"}' DYNAMIC_QUERY_CONFIG: "{}" DATABASE: DEFAULT_SCHEMA_NAME: HOST: PORT: 1433 USERNAME: PASSWORD: hive: &hive DOWNLOAD_JDBC: "true" DOWNLOAD_JDBC_URL: "https://atlan-public.s3-eu-west-1.amazonaws.com/atlan/jdbc/hive.tar.gz" DOWNLOAD_JDBC_DST: "/jars/hive.tar.gz" DRIVER: "org.apache.hive.jdbc.HiveDriver" URL_TEMPLATE: "jdbc:hive2://$$HOST:$$PORT/$$DEFAULT_SCHEMA" RUNTIME_PROPS: '{"user":"USERNAME","password":"PASSWORD"}' SCHEMA_EXCLUDE_REGEX: "$$^" SCHEMA_INCLUDE_REGEX: "^.*$$" EXCLUDE_TABLE_TYPES: "[]" TEMP_TABLE_REGEX: "$$^" HOST: PORT: 10000 DEFAULT_SCHEMA: default USERNAME: PASSWORD: teradatadb: &teradatadb DOWNLOAD_JDBC: "true" DOWNLOAD_JDBC_URL: "https://atlan-public.s3.eu-west-1.amazonaws.com/atlan/jdbc/teradata_atlan.tar.gz" DOWNLOAD_JDBC_DST: "/jars/teradata_atlan.tar.gz" DRIVER: "com.teradata.jdbc.TeraDriver" URL_TEMPLATE: "jdbc:teradata://$$HOST" SYSTEM_SCHEMA_REGEX: '["^All$","^Crashdumps$","^DBC$","^dbcmngr$","^Default$","^External_AP$","^EXTUSER$","^LockLogShredder$","^PUBLIC$","^Sys_Calendar$","^SysAdmin$","^SYSBAR$","^SYSJDBC$","^SYSLIB$","^SystemFe$","^SYSUDTLIB$","^SYSUIF$","^TD_SERVER_DB$","^TDStats$","^TD_SYSGPL$","^TD_SYSXML$","^TDMaps$","^TDPUSER$","^TDQCD$","^tdwm$","^SQLJ$","^TD_SYSFNLIB$","^SYSSPATIAL$"]' TEMP_TABLE_REGEX: "[]" EXCLUDE_TABLE_TYPES: "[]" SYSTEM_DATABASE_REGEX: "[]" RUNTIME_PROPS: '{"user":"USERNAME","password":"PASSWORD"}' FETCH_PRIMARY_KEYS: "true" FETCH_IMPORTED_KEYS: "true" TABLE_COMPANION_SQL: | WITH row_count AS ( SELECT DatabaseName, TableName, RowCount FROM DBC.TableStatsV), column_counts AS ( SELECT tablename, COUNT(*) AS ColumnCount FROM dbc.columns GROUP BY tablename) SELECT tb.DataBaseName AS TABLE_SCHEM, tb.TableName AS TABLE_NAME, tb.CommentString AS REMARKS, tb.CreatorName AS CREATOR_NAME, tb.CreateTimeStamp AS CREATE_TIMESTAMP, tb.LastAlterTimeStamp AS LAST_ALTER_TIMESTAMP, TRIM(tb.LastAlterName) AS LAST_ALTER_NAME, tb.RequestTxtOverFlow, ts.CurrentPerm AS byte_size, row_count.RowCount, co.ColumnCount, CASE WHEN tb.RequestTxtOverFlow IS NULL THEN tb.RequestText ELSE tt.RequestText END AS ROUTINE_DEFINITION, CASE WHEN tb.TableKind IN ('T', 'O') THEN 'TABLE' ELSE 'VIEW' END AS TABLE_TYPE FROM dbc.TablesV tb LEFT JOIN column_counts AS co ON tb.TableName = co.tablename LEFT JOIN row_count ON tb.TableName = row_count.tablename LEFT OUTER JOIN (SELECT * FROM (SELECT DataBaseName, TableName, 2 * LineNo - 1 AS LineNo, substr(RequestText, 1, 16000) AS RequestText FROM dbc.TableTextV text_split_1 WHERE LOWER(text_split_1.DataBaseName) NOT IN ('''') AND LOWER(text_split_1.DataBaseName) NOT IN ('sysspatial', 'td_sysfnlib','syslib','tdwstage','twm_source','default', 'tdpuser','public','locklogshredder','sys_calendar','tpch', 'extuser','sysudtlib','td_sysxml','sysadmin','twm_md','all', 'dbcmngr','viewpoint','console','td_sysgpl','information_schema', 'dbc','sysjdbc','systemfe','external_ap','sqlj','tdstats','tdwm', 'tdqcd','crashdumps','sysuif','sysbar','td_server_db') AND text_split_1.TableKind IN ('O', 'T', 'V') UNION ALL SELECT DataBaseName, TableName, 2 * LineNo AS LineNo, substr(RequestText, 16001) AS RequestText FROM dbc.TableTextV text_split_2 WHERE LOWER(text_split_2.DataBaseName) NOT IN ('''') AND LOWER(text_split_2.DataBaseName) NOT IN ('sysspatial', 'td_sysfnlib','syslib','tdwstage','twm_source','default', 'tdpuser','public','locklogshredder','sys_calendar','tpch', 'extuser','sysudtlib','td_sysxml','sysadmin','twm_md','all', 'dbcmngr','viewpoint','console','td_sysgpl','information_schema', 'dbc','sysjdbc','systemfe','external_ap','sqlj','tdstats','tdwm', 'tdqcd','crashdumps','sysuif','sysbar','td_server_db') AND text_split_2.TableKind IN ('O', 'T', 'V') AND CHARACTER_LENGTH(RequestText) > 16000) tabletextv_split) tt ON tb.DataBaseName = tt.DataBaseName AND tb.TableName = tt.TableName LEFT OUTER JOIN (SELECT DataBaseName, TableName, sum(CurrentPerm) AS CurrentPerm, sum(PeakPerm) AS PeakPerm FROM dbc.TableSizeV GROUP BY DataBaseName, TableName) ts ON tb.DataBaseName = ts.DataBaseName AND tb.TableName = ts.TableName WHERE LOWER(tb.DataBaseName) NOT IN ('sysspatial','td_sysfnlib', 'syslib','tdwstage','twm_source','default','tdpuser','public', 'locklogshredder','sys_calendar','tpch','extuser','sysudtlib', 'td_sysxml','sysadmin','twm_md','all','dbcmngr','viewpoint', 'console','td_sysgpl','information_schema','dbc','sysjdbc', 'systemfe','external_ap','sqlj','tdstats','tdwm','tdqcd', 'crashdumps','sysuif','sysbar','td_server_db') AND tb.TableKind IN ('O', 'T', 'V') ORDER BY TABLE_TYPE, tb.DataBaseName, tb.TableName, tt.LineNo; COLUMN_COMPANION_SQL: | SELECT col.DatabaseName AS TABLE_SCHEM, CASE WHEN tab.TableKind IN ('T', 'O') THEN 'TABLE' ELSE 'VIEW' END AS TABLE_TYPE, col.TableName AS TABLE_NAME, col.ColumnName AS COLUMN_NAME, col.ColumnType AS DATA_TYPE, col.ColumnId AS COLUMN_ID, CAST ((row_number () OVER (PARTITION BY col.databasename, col.tablename order by col.ColumnId)) AS integer) AS ORDINAL_POSITION, CAST ( (CASE WHEN col.ColumnType IS NULL THEN '' ELSE (CASE col.ColumnType WHEN '++' THEN 'TD_ANYTYPE' WHEN 'AT' THEN 'TIME' WHEN 'BF' THEN 'BYTE' WHEN 'BO' THEN 'BLOB' WHEN 'BV' THEN 'VARBYTE' WHEN 'CF' THEN 'CHAR' WHEN 'CO' THEN 'CLOB' WHEN 'CV' THEN 'VARCHAR' WHEN 'D' THEN 'DECIMAL' WHEN 'DA' THEN 'DATE' WHEN 'DH' THEN 'INTERVAL DAY TO HOUR' WHEN 'DM' THEN 'INTERVAL DAY TO MINUTE' WHEN 'DS' THEN 'INTERVAL DAY TO SECOND' WHEN 'DY' THEN 'INTERVAL DAY' WHEN 'F' THEN 'FLOAT' WHEN 'GF' THEN 'GRAPHIC' WHEN 'GV' THEN 'VARGRAPHIC' WHEN 'HM' THEN 'INTERVAL HOUR TO MINUTE' WHEN 'HR' THEN 'INTERVAL HOUR' WHEN 'HS' THEN 'INTERVAL HOUR TO SECOND' WHEN 'I1' THEN 'BYTEINT' WHEN 'I2' THEN 'SMALLINT' WHEN 'I' THEN 'INTEGER' WHEN 'I8' THEN 'BIGINT' WHEN 'MI' THEN 'INTERVAL MINUTE' WHEN 'MO' THEN 'INTERVAL MONTH' WHEN 'MS' THEN 'INTERVAL MINUTE TO SECOND' WHEN 'N' THEN 'NUMBER' WHEN 'PD' THEN 'PERIOD(DATE)' WHEN 'PM' THEN 'PERIOD(TIMESTAMP WITH TIME ZONE)' WHEN 'PS' THEN 'PERIOD(TIMESTAMP)' WHEN 'PT' THEN 'PERIOD(TIME)' WHEN 'PZ' THEN 'PERIOD(TIME WITH TIME ZONE)' WHEN 'SC' THEN 'INTERVAL SECOND' WHEN 'SZ' THEN 'TIMESTAMP WITH TIME ZONE' WHEN 'TS' THEN 'TIMESTAMP' WHEN 'TZ' THEN 'TIME WITH TIME ZONE' WHEN 'XM' THEN 'XML' WHEN 'YM' THEN 'INTERVAL YEAR TO MONTH' WHEN 'YR' THEN 'INTERVAL YEAR' ELSE TRIM (col.ColumnType) END) END) AS VARCHAR(500)) AS TYPE_NAME, col.ColumnLength AS COLUMN_SIZE, col.PartitioningColumn AS PARTITIONING_COLUMN, col.DecimalTotalDigits, col.DecimalFractionalDigits, case col.Nullable when 'Y' then 'Yes' else 'No' end as Nullable, col.DefaultValue, col.ColumnConstraint, CASE WHEN pks.TableName IS NOT NULL THEN 'Yes' ELSE 'No' END AS PrimaryIndex, CASE WHEN pks.TableName IS NOT NULL THEN 'Yes' ELSE 'No' END AS PRIMARY_KEY, CASE WHEN fks.ChildDB IS NOT NULL THEN 'Yes' ELSE 'No' END AS FOREIGN_KEY, CASE WHEN uqs.UniqueFlag IS NOT NULL THEN 'Yes' ELSE 'No' END AS UniqueKey FROM DBC.ColumnsV col JOIN DBC.TablesV tab ON col.DataBaseName = tab.DataBaseName AND col.TableName = tab.TableName LEFT JOIN DBC.IndicesV pks ON tab.DatabaseName = pks.DatabaseName AND tab.TableName = pks.TableName AND tab.PrimaryKeyIndexId = pks.IndexNumber AND col.ColumnName = pks.ColumnName LEFT JOIN DBC.IndicesV uqs ON col.DatabaseName = uqs.DatabaseName AND col.TableName = uqs.TableName AND col.ColumnName = uqs.ColumnName AND uqs.IndexType = 'U' LEFT JOIN DBC.IndicesV pis ON col.DatabaseName = pis.DatabaseName AND col.TableName = pis.TableName AND col.ColumnName = pis.ColumnName AND uqs.IndexNumber = 1 LEFT JOIN DBC.All_RI_ChildrenV fks ON fks.ChildDB = col.DatabaseName AND fks.ChildTable = col.TableName AND fks.ChildKeyColumn = col.ColumnName WHERE LOWER(TRIM(col.DataBaseName)) NOT IN ('sysspatial', 'td_sysfnlib','syslib','tdwstage','twm_source','default', 'tdpuser','public','locklogshredder','sys_calendar','tpch', 'extuser','sysudtlib','td_sysxml','sysadmin','twm_md','all', 'dbcmngr','viewpoint','console','td_sysgpl','information_schema', 'dbc','sysjdbc','systemfe','external_ap','sqlj','tdstats','tdwm', 'tdqcd','crashdumps','sysuif','sysbar','td_server_db') ORDER BY col.DatabaseName, col.TableName, col.ColumnId; SCHEMA_COMPANION_SQL: | SELECT TRIM(d.DatabaseName) as TABLE_SCHEM, COUNT(CASE WHEN t.TableKind = 'T' THEN 1 END) AS TABLE_COUNT, COUNT(CASE WHEN t.TableKind = 'V' THEN 1 END) AS VIEW_COUNT, TRIM(d.CreatorName) AS CREATOR_NAME, TRIM(d.OwnerName) AS OWNER_NAME, TRIM(d.AccountName) AS ACCOUNT_NAME, d.CommentString AS REMARKS, d.CreateTimeStamp AS CREATE_TIMESTAMP, TRIM(d.LastAlterName) AS LAST_ALTER_NAME, d.LastAlterTimeStamp AS LAST_ALTER_TIMESTAMP FROM dbc.databases as d LEFT JOIN dbc.tables as t ON d.DatabaseName = t.DatabaseName GROUP BY d.DatabaseName, d.CreatorName, d.OwnerName, d.AccountName, d.CommentString, d.CreateTimeStamp, d.LastAlterName, d.LastAlterTimeStamp; DYNAMIC_QUERY_CONFIG: | { "column": [ { "condition": "{{ (row.TABLE_TYPE == 'VIEW' or row.TABLE_TYPE == 'MATERIALIZED VIEW' ) }}", "column": "DETAILS", "sql": "HELP Column {{row.TABLE_SCHEM}}.{{row.TABLE_NAME}}.{{row.COLUMN_NAME}}" } ] } EXTRA_COMPANION_SQL: "{}" DEFAULT_DATABASE_NAME: DEFAULT DEFAULT_SCHEMA_NAME: HOST: PORT: 1025 USERNAME: PASSWORD: teradatadb-ldap: &teradatadb-ldap <<: *teradatadb URL_TEMPLATE: "jdbc:teradata://$$HOST/database=$$DATABASE,USER=$$USERNAME,PASSWORD=$$PASSWORD,tmode=ANSI,charset=UTF8,LOGMECH=LDAP,RECONNECT_COUNT=20" DATABASE: saphana: &saphana DOWNLOAD_JDBC: "true" DOWNLOAD_JDBC_URL: "https://atlan-public.s3-eu-west-1.amazonaws.com/atlan/jdbc/sap-hana.tar.gz" DOWNLOAD_JDBC_DST: "/jars/sap-hana.tar.gz" DRIVER: "com.sap.db.jdbc.Driver" URL_TEMPLATE: "jdbc:sap://$$HOST:$$PORT" EXTRA_PROP_TEMPLATE: '{\"databaseTerm\": \"SCHEMA\"}' SYSTEM_SCHEMA_REGEX: "['^SYS_.*', '^_SYS_.*', '^SAP_.*', '^_SAP_.*','^PAL_.*', '^FDT_.*', '^XSSQLCC_AUTO_USER_.*', '^SYS$', 'BROKER_USER', 'BROKER_PO_USER', 'HANA_XS_BASE', 'UIS', 'PUBLIC', 'SAPHANADB', 'DBACOCKPIT', 'SAPDBCTRL']" EXCLUDE_TABLE_TYPES: "[\"CALC VIEW\", \"GLOBAL TEMPORARY\", \"HIERARCHY VIEW\", \"JOIN VIEW\", \"OLAP VIEW\", \"SYNONYM\", \"USER DEFINED\"]" TEMP_TABLE_REGEX: "[]" SYSTEM_DATABASE_REGEX: "[]" RUNTIME_PROPS: '{"user":"USERNAME","username":"USERNAME","password":"PASSWORD"}' FETCH_PRIMARY_KEYS: "false" FETCH_IMPORTED_KEYS: "false" TABLE_COMPANION_SQL: | ( SELECT T.COMMENTS AS REMARKS, T.CREATE_TIME, NULL AS VIEW_DEFINITION, 'DEFAULT' AS TABLE_CAT, T.SCHEMA_NAME AS TABLE_SCHEM, T.TABLE_NAME, T.TABLE_OID, T.TABLE_TYPE, MT.IS_PARTITIONED AS HAS_PARTITIONS, MT.RECORD_COUNT AS ROW_COUNT, MT.TABLE_SIZE as BYTES, T.AUTO_MERGE_ON, T.AUTO_OPTIMIZE_COMPRESSION_ON, T.COMMIT_ACTION, T.COMPRESSED_EXTKEY, T.FIXED_PART_SIZE, T.HAS_MASKED_COLUMNS, T.HAS_PRIMARY_KEY, T.HAS_RECORD_COMMIT_TIMESTAMP, T.HAS_STRUCTURED_PRIVILEGE_CHECK, T.HAS_TEXT_FIELDS, T.IS_COLUMN_TABLE, T.IS_INSERT_ONLY, T.IS_LOGGED, T.IS_PARTIAL_PRELOAD, T.IS_PRELOAD, T.IS_PUBLIC, T.IS_REPLICA, T.IS_REPLICATION_LOG_ENABLED, T.IS_SYSTEM_TABLE, T.IS_TEMPORARY, T.IS_USER_DEFINED_TYPE, T.PERSISTENT_MEMORY, T.ROW_ORDER_TYPE, T.TEMPORAL_TYPE, T.TEMPORARY_TABLE_TYPE, T.UNLOAD_PRIORITY, T.USES_DIMFN_CACHE, T.USES_EXTKEY, T.USES_QUEUE_TABLE FROM SYS.TABLES T LEFT JOIN SYS.M_TABLES MT ON MT.SCHEMA_NAME = T.SCHEMA_NAME AND MT.TABLE_NAME = T.TABLE_NAME WHERE T.SCHEMA_NAME = 'SYSTEM' OR ( T.SCHEMA_NAME NOT IN ( 'SYS', 'SYSHDL', 'BROKER_PO_USER', 'BROKER_USER', 'HANA_XS_BASE', 'UIS', 'PUBLIC' ) AND T.SCHEMA_NAME NOT LIKE 'SYS$_%' ESCAPE '$' AND T.SCHEMA_NAME NOT LIKE '$_SYS$_%' ESCAPE '$' AND T.SCHEMA_NAME NOT LIKE 'SYSHDL$_%' ESCAPE '$' AND T.SCHEMA_NAME NOT LIKE 'SAP$_%' ESCAPE '$' AND T.SCHEMA_NAME NOT LIKE '$_SAP$_%' ESCAPE '$' AND T.SCHEMA_NAME NOT LIKE 'PAL$_%' ESCAPE '$' ) ) UNION ( SELECT V.COMMENTS AS REMARKS, V.CREATE_TIME, CAST(V.DEFINITION AS NVARCHAR) AS VIEW_DEFINITION, 'DEFAULT' AS TABLE_CAT, V.SCHEMA_NAME AS TABLE_SCHEM, V.VIEW_NAME AS TABLE_NAME, V.VIEW_OID AS TABLE_OID, 'VIEW' AS TABLE_TYPE, NULL AS HAS_PARTITIONS, NULL AS ROW_COUNT, NULL AS BYTES, NULL AS AUTO_MERGE_ON, NULL AS AUTO_OPTIMIZE_COMPRESSION_ON, NULL AS COMMIT_ACTION, NULL AS COMPRESSED_EXTKEY, NULL AS FIXED_PART_SIZE, V.HAS_MASKED_COLUMNS, NULL AS HAS_PRIMARY_KEY, NULL AS HAS_RECORD_COMMIT_TIMESTAMP, V.HAS_STRUCTURED_PRIVILEGE_CHECK, NULL AS HAS_TEXT_FIELDS, NULL AS IS_COLUMN_TABLE, NULL AS IS_INSERT_ONLY, NULL AS IS_LOGGED, NULL AS IS_PARTIAL_PRELOAD, NULL AS IS_PRELOAD, NULL AS IS_PUBLIC, NULL AS IS_REPLICA, NULL AS IS_REPLICATION_LOG_ENABLED, NULL AS IS_SYSTEM_TABLE, NULL AS IS_TEMPORARY, NULL AS IS_USER_DEFINED_TYPE, NULL AS PERSISTENT_MEMORY, NULL AS ROW_ORDER_TYPE, NULL AS TEMPORAL_TYPE, NULL AS TEMPORARY_TABLE_TYPE, NULL AS UNLOAD_PRIORITY, NULL AS USES_DIMFN_CACHE, NULL AS USES_EXTKEY, NULL AS USES_QUEUE_TABLE FROM SYS.VIEWS V WHERE V.SCHEMA_NAME = 'SYSTEM' OR ( V.SCHEMA_NAME NOT IN ( 'SYS', 'SYSHDL', 'BROKER_PO_USER', 'BROKER_USER', 'HANA_XS_BASE', 'UIS', 'PUBLIC' ) AND V.SCHEMA_NAME NOT LIKE 'SYS$_%' ESCAPE '$' AND V.SCHEMA_NAME NOT LIKE '$_SYS$_%' ESCAPE '$' AND V.SCHEMA_NAME NOT LIKE 'SYSHDL$_%' ESCAPE '$' AND V.SCHEMA_NAME NOT LIKE 'SAP$_%' ESCAPE '$' AND V.SCHEMA_NAME NOT LIKE 'PAL$_%' ESCAPE '$' ) ); COLUMN_COMPANION_SQL: | SELECT 'DEFAULT' AS TABLE_CAT, T.SCHEMA_NAME AS TABLE_SCHEM, T.TABLE_NAME, TC.COLUMN_NAME, TC.COMMENTS as REMARKS, TC.DATA_TYPE_NAME as TYPE_NAME, TC.POSITION as ORDINAL_POSITION, TC.DEFAULT_VALUE AS COLUMN_DEFAULT, TC.IS_NULLABLE AS IS_NULLABLE, TC.SCALE AS NUMERIC_SCALE, TC.LENGTH AS CHARACTER_MAXIMUM_LENGTH, IC.CONSTRAINT AS CONSTRAINT_TYPE, RC.REFERENCED_COLUMN_NAME AS REFERENCED_COLUMN_NAME, TC.* FROM SYS.TABLE_COLUMNS TC LEFT JOIN SYS.TABLES T ON T.SCHEMA_NAME = TC.SCHEMA_NAME AND T.TABLE_NAME = TC.TABLE_NAME LEFT JOIN SYS.TABLE_COLUMNS_ODBC TCO ON TCO.SCHEMA_NAME = TC.SCHEMA_NAME AND TCO.TABLE_NAME = TC.TABLE_NAME AND TCO.COLUMN_NAME = TC.COLUMN_NAME LEFT JOIN SYS.INDEX_COLUMNS IC ON IC.SCHEMA_NAME = TC.SCHEMA_NAME AND IC.TABLE_NAME = TC.TABLE_NAME AND IC.COLUMN_NAME = TC.COLUMN_NAME LEFT JOIN SYS.REFERENTIAL_CONSTRAINTS RC ON RC.SCHEMA_NAME = TC.SCHEMA_NAME AND RC.TABLE_NAME = TC.TABLE_NAME AND RC.COLUMN_NAME = TC.COLUMN_NAME WHERE T.SCHEMA_NAME = 'SYSTEM' OR ( T.SCHEMA_NAME NOT IN ( 'SYS', 'SYSHDL', 'BROKER_PO_USER', 'BROKER_USER', 'HANA_XS_BASE', 'UIS', 'PUBLIC' ) AND V.SCHEMA_NAME NOT LIKE 'SYS$_%' ESCAPE '$' AND V.SCHEMA_NAME NOT LIKE '$_SYS$_%' ESCAPE '$' AND V.SCHEMA_NAME NOT LIKE 'SYSHDL$_%' ESCAPE '$' AND V.SCHEMA_NAME NOT LIKE 'SAP$_%' ESCAPE '$' AND V.SCHEMA_NAME NOT LIKE 'PAL$_%' ESCAPE '$' ); EXTRA_COMPANION_SQL: | { "procedures": "SELECT 'DEFAULT' as PROCEDURE_CAT, SCHEMA_NAME as PROCEDURE_SCHEM, SCHEMA_NAME as TABLE_SCHEM, PROCEDURE_NAME, PROCEDURE_TYPE, CAST(DEFINITION AS NVARCHAR) ROUTINE_DEFINITION FROM SYS.PROCEDURES WHERE SCHEMA_NAME NOT IN( 'SYSTEM', 'SYS', 'SYSHDL', 'BROKER_USER', 'BROKER_PO_USER', 'HANA_XS_BASE', 'UIS', 'PUBLIC' ) AND SCHEMA_NAME NOT LIKE 'SYS$_%' ESCAPE '$' AND SCHEMA_NAME NOT LIKE '$_SYS$_%' ESCAPE '$' AND SCHEMA_NAME NOT LIKE 'SYSHDL$_%' ESCAPE '$' AND SCHEMA_NAME NOT LIKE 'SAP$_%' ESCAPE '$' AND SCHEMA_NAME NOT LIKE 'PAL$_%' ESCAPE '$'", "calc": "SELECT 'DEFAULT' AS PROCEDURE_CAT, a.PACKAGE_ID AS PROCEDURE_SCHEM, a.PACKAGE_ID AS TABLE_SCHEM, a.OBJECT_NAME AS PROCEDURE_NAME, a.OBJECT_SUFFIX AS PROCEDURE_TYPE, a.VERSION_ID, a.ACTIVATED_AT, a.ACTIVATED_BY, CAST(a.CDATA AS NVARCHAR) AS ROUTINE_DEFINITION, b.COLUMN_SQL_TYPE, b.COLUMN_NAME FROM _SYS_REPO.ACTIVE_OBJECT a JOIN _SYS_BI.BIMC_PROPERTIES b ON a.OBJECT_NAME = b.CUBE_NAME WHERE a.OBJECT_SUFFIX = 'calculationview' AND b.COLUMN_FLAG = 'Dimension Attribute'" } DATABASE_COMPANION_SQL: "" DYNAMIC_QUERY_CONFIG: "{}" DEFAULT_DATABASE_NAME: "DEFAULT" DEFAULT_SCHEMA_NAME: HAS_DATABASE_ABSTRACTION: "false" HOST: PORT: 30015 USERNAME: PASSWORD: # Create a separate service for each database connection: services: # Example PostgreSQL connection psql-example: <<: *extract environment: <<: *psql # Change these parameters to access your PostgreSQL database: USERNAME: MY-DATABASE-USERNAME PASSWORD: MY-DATABASE-PASSWORD HOST: POSTGRES-HOST DATABASE: MY-DATABASE-NAME # Optional filters, make sure to either change them or completely remove them. INCLUDE_FILTER_TEMPLATE: '{"DB1": ["SCHEMA1", "SCHEMA2"]}' EXCLUDE_FILTER_TEMPLATE: '{"DB2": ["SCHEMA1", "SCHEMA2"]}' # Set to true to fetch only the schemas selected in the include filter above USE_SOURCE_SCHEMA_FILTERING: "false" # Set to false to disable JDBC internal methods USE_JDBC_INTERNAL_METHODS: "true" volumes: - *shared-jdbc-drivers # Specify suitable output folder here - ./output/psql-example:/output # Example MySQL connection mysql-example: <<: *extract environment: <<: *mysql # Change these parameters to access your MySQL database: USERNAME: MY-DATABASE-USERNAME PASSWORD: MY-DATABASE-PASSWORD HOST: MYSQL-HOST # Optional filters, make sure to either change them or completely remove them. INCLUDE_FILTER_TEMPLATE: '{"DB1": ["SCHEMA1", "SCHEMA2"]}' EXCLUDE_FILTER_TEMPLATE: '{"DB2": ["SCHEMA1", "SCHEMA2"]}' # Set to true to fetch only the schemas selected in the include filter above USE_SOURCE_SCHEMA_FILTERING: "false" # Set to false to disable JDBC internal methods USE_JDBC_INTERNAL_METHODS: "true" volumes: - *shared-jdbc-drivers # Specify suitable output folder here - ./output/mysql-example:/output # Example Oracle DB connection oracledb-example: <<: *extract environment: <<: *oracledb # Change these parameters to access your Oracle database: USERNAME: MY-DATABASE-USERNAME PASSWORD: MY-DATABASE-PASSWORD HOST: ORACLE-HOST DEFAULT_DATABASE_NAME: MY-DEFAULT-DATABASE-NAME SERVICE: MY-ORACLE-INSTANCE-SID # Will be used only when credentials are stored in a secret. The values stored in secret will take precedence over the above credential values CREDENTIAL_SECRET_PATH: "/run/secrets/oracle_credentials" # Optional filters, make sure to either change them or completely remove them. INCLUDE_FILTER_TEMPLATE: '{"DB1": ["SCHEMA1", "SCHEMA2"]}' EXCLUDE_FILTER_TEMPLATE: '{"DB2": ["SCHEMA1", "SCHEMA2"]}' # Set to true to fetch only the schemas selected in the include filter above USE_SOURCE_SCHEMA_FILTERING: "false" # Set to false to disable JDBC internal methods USE_JDBC_INTERNAL_METHODS: "true" volumes: - *shared-jdbc-drivers # Specify suitable output folder here - ./output/oracledb-example:/output # Example Teradata DB connection teradatadb-example: <<: *extract environment: <<: *teradatadb # Change these parameters to access your Teradata database: USERNAME: MY-DATABASE-USERNAME PASSWORD: MY-DATABASE-PASSWORD HOST: Teradata-HOST # Set to false to disable JDBC internal methods USE_JDBC_INTERNAL_METHODS: "true" # Teradata does not have the concept of a database, only the schemas # Hence, "DEFAULT" is to be used for the database in the filter template INCLUDE_FILTER_TEMPLATE: '{"DEFAULT":["TERA_DB_1","TERA_DB_2"]}' EXCLUDE_FILTER_TEMPLATE: '{}' volumes: - *shared-jdbc-drivers # Specify suitable output folder here - ./output/teradatadb-example:/output # Example Teradata DB connection teradatadb-ldap-example: <<: *extract environment: <<: *teradatadb-ldap # Change these parameters to access your Teradata database: USERNAME: MY-DATABASE-USERNAME PASSWORD: MY-DATABASE-PASSWORD HOST: Teradata-HOST # In Teradata, DATABASE <=> SCHEMA. Hence, the terms Database and Schema are used interchangeably DATABASE: MY-DATABASE-DATABASE # Set to false to disable JDBC internal methods USE_JDBC_INTERNAL_METHODS: "true" # Teradata does not have the concept of a database, only the schemas # Hence, "DEFAULT" is to be used for the database in the filter template INCLUDE_FILTER_TEMPLATE: '{"DEFAULT":["TERA_DB_1","TERA_DB_2"]}' EXCLUDE_FILTER_TEMPLATE: '{}' volumes: - *shared-jdbc-drivers # Specify suitable output folder here - ./output/teradatadb-ldap-example:/output # Example MSSQL Server connection mssql-example: <<: *extract environment: <<: *mssql # Change these parameters to access your MSSQL Server database: USERNAME: MY-DATABASE-USERNAME PASSWORD: MY-DATABASE-PASSWORD HOST: MSSQL-HOST DATABASE: MY-DATABASE-NAME # Optional filters, make sure to either change them or completely remove them. INCLUDE_FILTER_TEMPLATE: '{"DB1": ["SCHEMA1", "SCHEMA2"]}' EXCLUDE_FILTER_TEMPLATE: '{"DB2": ["SCHEMA1", "SCHEMA2"]}' # Set to true to fetch only the schemas selected in the include filter above USE_SOURCE_SCHEMA_FILTERING: "false" # Set to false to disable JDBC internal methods USE_JDBC_INTERNAL_METHODS: "true" volumes: - *shared-jdbc-drivers # Specify suitable output folder here - ./output/mssql-example:/output # Example Hive Server connection hive-example: <<: *hive-extract environment: <<: *hive # Change these parameters to access your Hive instance: HOST: hive-host PORT: hive-port DEFAULT_SCHEMA: default USERNAME: my-hive-username PASSWORD: my-hive-password # Set to false to disable JDBC internal methods USE_JDBC_INTERNAL_METHODS: "true" volumes: - *shared-jdbc-drivers # Specify suitable output folder here - ./output/hive-example:/output # Example SAP HANA connection saphana-example: <<: *extract environment: <<: *saphana # Change these parameters to access your SAP HANA database: USERNAME: MY-DATABASE-USERNAME PASSWORD: MY-DATABASE-PASSWORD HOST: MY-DATABASE-HOST PORT: 39017 # Optional filters, make sure to either change them or completely remove them. INCLUDE_FILTER_TEMPLATE: '{"DEFAULT": ["SCHEMA1","SCHEMA2"]}' EXCLUDE_FILTER_TEMPLATE: '{}' # Set to true to fetch only the schemas selected in the include filter above USE_SOURCE_SCHEMA_FILTERING: "false" # Set to false to disable JDBC internal methods USE_JDBC_INTERNAL_METHODS: "true" volumes: - *shared-jdbc-drivers # Specify suitable output folder here - ./output/saphana-example:/output volumes: jars: