How can Redash support the new data source: MaxCompute (ODPS) ?

yanjlee · June 4, 2019, 9:21pm

MaxCompute (ODPS) is widely used in many companies.
Adding support for this data source will make redash more powerful

DevineLiu · June 11, 2019, 3:17am

I used to do this extension:

import logging
import sys
import base64

from redash.query_runner import *
from redash.utils import json_dumps

logger = logging.getLogger(__name__)

try:
    from odps import ODPS
    enabled = True
except ImportError:
    enabled = False

COLUMN_NAME = 0
COLUMN_TYPE = 1

types_map = {
    'STRING': TYPE_STRING,
    'BOOLEAN': TYPE_BOOLEAN,
    'BIGINT': TYPE_INTEGER,
    'DOUBLE': TYPE_FLOAT,
}


class Odps(BaseSQLQueryRunner):
    noop_query = "SELECT 1"

    @classmethod
    def configuration_schema(cls):
        return {
            "type": "object",
            "properties": {
                "endpoint": {
                    "type": "string"
                },
                "project": {
                    "type": "string"
                },
                "access_id": {
                    "type": "string"
                },
                "secret_access_key": {
                    "type": "string"
                },
            },
            "order": ["endpoint", "project", "access_id", "secret_access_key"],
            "required": ["endpoint", "project", "access_id", "secret_access_key"]
        }

    @classmethod
    def annotate_query(cls):
        return False

    @classmethod
    def type(cls):
        return "odps"

    @classmethod
    def enabled(cls):
        return enabled

    def _get_connection(self):
        connection = ODPS(self.configuration.get("access_id",None), self.configuration.get("secret_access_key",None),self.configuration.get("project",None),self.configuration.get("endpoint",None)) 
        return connection

    def get_schema(self, get_stats=False):
        connection = self._get_connection() 
        print('get_sc')
        schema = [{"name":table.name,"columns":table.schema.names}  for table in connection.list_tables()]
        print('end_sc')
        print(schema)
        return schema

    def run_query(self, query, user):
        connection = None
        try:
            cursor = self._get_connection()
            _instance = cursor.execute_sql(query)

            column_names = []
            columns = []

            with _instance.open_reader() as reader:
                columns = [{"name":ii[0],"type":types_map.get(ii[1],TYPE_STRING)} for  ii in zip(reader._schema.names,([str(i) for  i in  reader._schema.types]))]
                rows = [dict(i)  for i in reader ]


            data = {'columns': columns, 'rows': rows}
            json_data = json_dumps(data)
            error = None
        except KeyboardInterrupt:
            error = "Query cancelled by user."
            json_data = None
        except Exception,e:
            error = e 
            json_data = None

        return json_data, error


register(Odps)

yanjlee · June 11, 2019, 10:29am

thanks a lot, i also write a version, yours better

cheney · July 19, 2020, 8:26am

I try to use this extension and the connection is done,but the query result is “0 row”.I guess the function of run_query doesn’t work.I’m terrible at programing and I don’t know how to check the code.Could you tell me some ideas on how to solve it.thank you!

cheney · July 19, 2020, 8:27am

I try to use this extension and the connection is done,but the query result is “0 row”.I guess the function of run_query doesn’t work.I’m terrible at programing and I don’t know how to check the code.Could you tell me some ideas on how to solve it.thank you!

▼Categories

▼Tags

How can Redash support the new data source: MaxCompute (ODPS) ?