diff --git a/index.html b/index.html index 0b2fc77..870e22d 100644 --- a/index.html +++ b/index.html @@ -1,17 +1,18 @@ +
- - - + + + - - - + + + @@ -26,15 +27,13 @@🤗
- - Dataset (Coming Soon) - - - - - - - -🤗
- - Leaderboard (Coming Soon) - - + + + + + + + arXiv (Coming Soon) + + + + + + + + + + Code + + + + + + + + +🤗
+ + Dataset (Coming Soon) + + + + + + + +🤗
+ + Leaderboard (Coming Soon) + + +We introduce MULTI: a multi-level, multi-disciplinary, and multi-type cross-modal test benchmark, aimed at
- evaluating the performance of multimodal generative large models under different conditions and scenarios. We collected and annotated more than 18K questions from exams, quizzes, textbooks, websites and other resources, most
- of which underwent at least two rounds of human annotation and checking, and three rounds of script cleaning. Some questions were manually adapted to make them more suitable for evaluating the comprehensive ability of the
- model. These questions involve four educational levels: junior high school, high school, college and social exams, covering Chinese, mathematics, English, physics, chemistry, biology, history, geography, politics,
- information technology, driving test and other disciplines and fields, including single choice, multiple choice, fill in the blank (given range and fully open), and open-ended discussions.
-
We manually selected 500 questions to form a difficult subset, which is used to evaluate the model's extreme performance. These questions often contain multiple images and formulas, test the model's comprehensive
- understanding of multiple images, and require complex and rigorous logical reasoning. The performance of this part of the data will be displayed separately on the leaderboard.
-
We tested on GPT-3.5 and open-source multimodal large models*, and the results show that even the advanced GPT-3.5 only achieved 43.28% accuracy, showing a huge room for improvement. We believe that MULTI
- will motivate the community to build the next generation of multimodal foundation models, to achieve expert-level artificial general intelligence.
-
-
* Based on v0.3.0-20231115 version of the data, tested on SC/MC/FB three question types.
-
+ We introduce MULTI: a multi-level, multi-disciplinary, and multi-type cross-modal test
+ benchmark, aimed at
+ evaluating the performance of multimodal generative large models under different conditions and
+ scenarios. We collected and annotated more than 18K questions from exams, quizzes, textbooks,
+ websites and other resources, most
+ of which underwent at least two rounds of human annotation and checking, and three rounds of script
+ cleaning. Some questions were manually adapted to make them more suitable for evaluating the
+ comprehensive ability of the
+ model. These questions involve four educational levels: junior high school, high school, college and
+ social exams, covering Chinese, mathematics, English, physics, chemistry, biology, history,
+ geography, politics,
+ information technology, driving test and other disciplines and fields, including single choice,
+ multiple choice, fill in the blank (given range and fully open), and open-ended discussions.
+
We manually selected 500 questions to form a difficult subset, which is used to evaluate the
+ model's extreme performance. These questions often contain multiple images and formulas, test the
+ model's comprehensive
+ understanding of multiple images, and require complex and rigorous logical reasoning. The
+ performance of this part of the data will be displayed separately on the leaderboard.
+
We tested on GPT-3.5 and open-source multimodal large models*, and the results
+ show that even the advanced GPT-3.5 only achieved 43.28% accuracy, showing a huge room for
+ improvement. We believe that MULTI
+ will motivate the community to build the next generation of multimodal foundation models, to achieve
+ expert-level artificial general intelligence.
+
+
* Based on v0.3.0-20231115 version of the data, tested on SC/MC/FB + three question types.
+
-
- How can I early access MULTI 🤔?
-
- Please feel free to contact (JamesZhutheThird@sjtu.edu.cn) and keep in touch with us. -
- + +
+
+ How can I early access MULTI 🤔?
+
+ Please feel free to contact (JamesZhutheThird@sjtu.edu.cn) and + keep in touch with us. +
+ +- MULTI consist of more than 18K questions and 8K images, covering 23 subjects and 4 educational levels. MULTI is one of the largest Chinese multimodal datasets in complex - scientific reasoning and image understanding. -
+ + + + ++ MULTI consist of more than 18K questions and 8K images, covering + 23 subjects and 4 educational levels. MULTI is one of the largest + Chinese multimodal datasets in complex + scientific reasoning and image understanding. +
+- Our annotation platform is designed to support editing and rendering complex MarkDown formats, and it's easy to check and update question property in detail. -
-
-
-
-
+ Our annotation platform is designed to support editing and rendering complex MarkDown + formats, and it's easy to check and update question property in detail. +
+
+
+
+
- This is an example of our question. -
+ + + + + + + + + + + + + ++ This is an example of our question. +
+- Several data annotation examples when constructing MULTI. -
++ Several data annotation examples when constructing MULTI. +
+- Several data augmentation examples when constructing MULTI. -
++ Several data augmentation examples when constructing MULTI. +
+- The example prompts used when evaluating on MULTI. -
-
-
-
-
+ The example prompts used when evaluating on MULTI. +
+
+
+
+
@misc{zhu2023multibench,
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ BibTeX
+ @misc{zhu2023multibench,
title={MULTI: Multimodal Understanding Leaderboard with Text and Images},
- author={Zichen Zhu, Yang Xu, Lu Chen, Jingkai Yang, Yichuan Ma, Yimin Sun, Hailin Wen, Jiaqi Liu, Jinyu Cai, Yingzi Ma, Liangtai Sun, Zihan Zhao, Kai Yu},
+ author={Zichen Zhu, Yang Xu, Lu Chen, Jingkai Yang, Yichuan Ma, Yiming Sun, Hailin Wen, Jiaqi Liu, Jinyu Cai, Yingzi Ma, Liangtai Sun, Zihan Zhao, Kai Yu},
year={2023},
howpublished = "\url{https://github.com/X-LANCE/MULTI-Benchmark}",
}
+
+
+
@@ -449,4 +570,4 @@ BibTeX
-
+
\ No newline at end of file
diff --git a/static/js/sort-table.js b/static/js/sort-table.js
new file mode 100644
index 0000000..98a71ed
--- /dev/null
+++ b/static/js/sort-table.js
@@ -0,0 +1,309 @@
+/**
+ * sort-table.js
+ * A pure JavaScript (no dependencies) solution to make HTML
+ * Tables sortable
+ *
+ * Copyright (c) 2013 Tyler Uebele
+ * Released under the MIT license. See included LICENSE.txt
+ * or http://opensource.org/licenses/MIT
+ *
+ * latest version available at https://github.com/tyleruebele/sort-table
+ */
+
+/**
+ * Sort the rows in a HTML Table
+ *
+ * @param Table The Table DOM object
+ * @param col The zero-based column number by which to sort
+ * @param dir Optional. The sort direction; pass 1 for asc; -1 for desc
+ * @returns void
+ */
+function sortTable(Table, col, dir) {
+ var sortClass, i;
+
+ // get previous sort column
+ sortTable.sortCol = -1;
+ sortClass = Table.className.match(/js-sort-\d+/);
+ if (null != sortClass) {
+ sortTable.sortCol = sortClass[0].replace(/js-sort-/, '');
+ Table.className = Table.className.replace(new RegExp(' ?' + sortClass[0] + '\\b'), '');
+ }
+ // If sort column was not passed, use previous
+ if ('undefined' === typeof col) {
+ col = sortTable.sortCol;
+ }
+
+ if ('undefined' !== typeof dir) {
+ // Accept -1 or 'desc' for descending. All else is ascending
+ sortTable.sortDir = dir == -1 || dir == 'desc' ? -1 : 1;
+ } else {
+ // sort direction was not passed, use opposite of previous
+ sortClass = Table.className.match(/js-sort-(a|de)sc/);
+ if (null != sortClass && sortTable.sortCol == col) {
+ sortTable.sortDir = 'js-sort-asc' == sortClass[0] ? -1 : 1;
+ } else {
+ sortTable.sortDir = 1;
+ }
+ }
+ Table.className = Table.className.replace(/ ?js-sort-(a|de)sc/g, '');
+
+ // update sort column
+ Table.className += ' js-sort-' + col;
+ sortTable.sortCol = col;
+
+ // update sort direction
+ Table.className += ' js-sort-' + (sortTable.sortDir == -1 ? 'desc' : 'asc');
+
+ // get sort type
+ if (col < Table.tHead.rows[Table.tHead.rows.length - 1].cells.length) {
+ sortClass = Table.tHead.rows[Table.tHead.rows.length - 1].cells[col].className.match(/js-sort-[-\w]+/);
+ }
+ // Improved support for colspan'd headers
+ for (i = 0; i < Table.tHead.rows[Table.tHead.rows.length - 1].cells.length; i++) {
+ if (col == Table.tHead.rows[Table.tHead.rows.length - 1].cells[i].getAttribute('data-js-sort-colNum')) {
+ sortClass = Table.tHead.rows[Table.tHead.rows.length - 1].cells[i].className.match(/js-sort-[-\w]+/);
+ }
+ }
+ if (null != sortClass) {
+ sortTable.sortFunc = sortClass[0].replace(/js-sort-/, '');
+ } else {
+ sortTable.sortFunc = 'string';
+ }
+ // Set the headers for the active column to have the decorative class
+ Table.querySelectorAll('.js-sort-active').forEach(function(Node) {
+ Node.className = Node.className.replace(/ ?js-sort-active\b/, '');
+ });
+ Table.querySelectorAll('[data-js-sort-colNum="' + col + '"]:not(:empty)').forEach(function(Node) {
+ Node.className += ' js-sort-active';
+ });
+
+ // sort!
+ var rows = [],
+ TBody = Table.tBodies[0];
+
+ for (i = 0; i < TBody.rows.length; i++) {
+ rows[i] = TBody.rows[i];
+ }
+ if ('none' != sortTable.sortFunc) {
+ rows.sort(sortTable.compareRow);
+ }
+
+ while (TBody.firstChild) {
+ TBody.removeChild(TBody.firstChild);
+ }
+ for (i = 0; i < rows.length; i++) {
+ TBody.appendChild(rows[i]);
+ }
+}
+
+/**
+ * Compare two table rows based on current settings
+ *
+ * @param RowA A TR DOM object
+ * @param RowB A TR DOM object
+ * @returns {number} 1 if RowA is greater, -1 if RowB, 0 if equal
+ */
+sortTable.compareRow = function(RowA, RowB) {
+ var valA, valB;
+ if ('function' != typeof sortTable[sortTable.sortFunc]) {
+ sortTable.sortFunc = 'string';
+ }
+ valA = sortTable[sortTable.sortFunc](RowA.cells[sortTable.sortCol]);
+ valB = sortTable[sortTable.sortFunc](RowB.cells[sortTable.sortCol]);
+
+ return valA == valB ? 0 : sortTable.sortDir * (valA > valB ? 1 : -1);
+};
+
+/**
+ * Strip all HTML, no exceptions
+ * @param html
+ * @returns {string}
+ */
+sortTable.stripTags = function(html) {
+ replace_unit = (s) => {
+ let iUnit = (s.indexOf('M') > -1) ? s.indexOf('M') : s.indexOf('B');
+ if (iUnit == -1) return s;
+ let unit = s[iUnit];
+ let val = Number(s.substring(0, iUnit));
+ if (isNaN(val)) return s;
+ val *= (unit == 'M') ? 1000000 : 1000000000;
+ return val.toString();
+ }
+ html = replace_unit(html);
+ return html.replace(/<\/?[a-z][a-z0-9]*\b[^>]*>/gi, '');
+};
+
+/**
+ * Helper function that converts a table cell (TD) to a comparable value
+ * Converts innerHTML to a timestamp, 0 for invalid dates
+ *
+ * @param Cell A TD DOM object
+ * @returns {Number}
+ */
+sortTable.date = function(Cell) {
+ // If okDate library is available, Use it for advanced Date processing
+ if (typeof okDate !== 'undefined') {
+ var kDate = okDate(sortTable.stripTags(Cell.innerHTML));
+ return kDate ? kDate.getTime() : 0;
+ } else {
+ return (new Date(sortTable.stripTags(Cell.innerHTML))).getTime() || 0;
+ }
+};
+
+/**
+ * Helper function that converts a table cell (TD) to a comparable value
+ * Converts innerHTML to a JS Number object
+ *
+ * @param Cell A TD DOM object
+ * @returns {Number}
+ */
+sortTable.number = function(Cell) {
+ return Number(sortTable.stripTags(Cell.innerHTML).replace(/[^-\d.]/g, ''));
+};
+
+/**
+ * Helper function that converts a table cell (TD) to a comparable value
+ * Converts innerHTML to a lower case string for insensitive compare
+ *
+ * @param Cell A TD DOM object
+ * @returns {String}
+ */
+sortTable.string = function(Cell) {
+ return sortTable.stripTags(Cell.innerHTML).toLowerCase();
+};
+
+/**
+ * Helper function that converts a table cell (TD) to a comparable value
+ *
+ * @param Cell A TD DOM object
+ * @returns {String}
+ */
+sortTable.raw = function(Cell) {
+ return Cell.innerHTML;
+};
+
+/**
+ * Helper function that converts a table cell (TD) to a comparable value
+ * Captures the last space-delimited token from innerHTML
+ *
+ * @param Cell A TD DOM object
+ * @returns {String}
+ */
+sortTable.last = function(Cell) {
+ return sortTable.stripTags(Cell.innerHTML).split(' ').pop().toLowerCase();
+};
+
+/**
+ * Helper function that converts a table cell (TD) to a comparable value
+ * Captures the value of the first childNode
+ *
+ * @param Cell A TD DOM object
+ * @returns {String}
+ */
+sortTable.input = function(Cell) {
+ for (var i = 0; i < Cell.children.length; i++) {
+ if ('object' == typeof Cell.children[i]
+ && 'undefined' != typeof Cell.children[i].value
+ ) {
+ return Cell.children[i].value.toLowerCase();
+ }
+ }
+
+ return sortTable.string(Cell);
+};
+
+/**
+ * Helper function that prevents sorting by always returning null
+ *
+ * @param Cell A TD DOM object
+ * @returns null
+ */
+sortTable.none = function(Cell) {
+ return null;
+};
+
+/**
+ * Return the click handler appropriate to the specified Table and column
+ *
+ * @param Table Table to sort
+ * @param col Column to sort by
+ * @returns {Function} Click Handler
+ */
+sortTable.getClickHandler = function(Table, col) {
+ return function() {
+ sortTable(Table, col);
+ };
+};
+
+/**
+ * Attach sortTable() calls to table header cells' onclick events
+ * If the table(s) do not have a THead node, one will be created around the
+ * first row
+ */
+sortTable.init = function() {
+ var THead, Tables, Handler;
+ if (document.querySelectorAll) {
+ Tables = document.querySelectorAll('table.js-sort-table');
+ } else {
+ Tables = document.getElementsByTagName('table');
+ }
+
+ for (var i = 0; i < Tables.length; i++) {
+ // Because IE<8 doesn't support querySelectorAll, skip unclassed tables
+ if (!document.querySelectorAll && null === Tables[i].className.match(/\bjs-sort-table\b/)) {
+ continue;
+ }
+
+ // Prevent repeat processing
+ if (Tables[i].attributes['data-js-sort-table']) {
+ continue;
+ }
+
+ // Ensure table has a tHead element
+ if (!Tables[i].tHead) {
+ THead = document.createElement('thead');
+ THead.appendChild(Tables[i].rows[0]);
+ Tables[i].insertBefore(THead, Tables[i].children[0]);
+ } else {
+ THead = Tables[i].tHead;
+ }
+
+ // Attach click events to table header
+ for (var rowNum = 0; rowNum < THead.rows.length; rowNum++) {
+ for (var cellNum = 0, colNum = 0; cellNum < THead.rows[rowNum].cells.length; cellNum++) {
+ // Skip headers marked "js-sort-none"
+ if (THead.rows[rowNum].cells[cellNum].className.match(/\bjs-sort-none\b/)) {
+ continue;
+ }
+ // Define which column the header should invoke sorting for
+ THead.rows[rowNum].cells[cellNum].setAttribute('data-js-sort-colNum', colNum);
+ Handler = sortTable.getClickHandler(Tables[i], colNum);
+ window.addEventListener
+ ? THead.rows[rowNum].cells[cellNum].addEventListener('click', Handler)
+ : window.attachEvent && THead.rows[rowNum].cells[cellNum].attachEvent('onclick', Handler);
+ colNum += THead.rows[rowNum].cells[cellNum].colSpan;
+ }
+ }
+
+ // Mark table as processed
+ Tables[i].setAttribute('data-js-sort-table', 'true')
+ }
+
+ // Add default styles as the first style in head so they can be easily overwritten by user styles
+ var element = document.createElement('style');
+ document.head.insertBefore(element, document.head.childNodes[0]);
+ var sheet = element.sheet;
+ sheet.insertRule('table.js-sort-table.js-sort-asc thead tr > .js-sort-active:not(.js-sort-none):after {content: "\\25b2";font-size: 0.7em;padding-left: 3px;line-height: 0.7em;}', 0);
+ sheet.insertRule('table.js-sort-table.js-sort-desc thead tr > .js-sort-active:not(.js-sort-none):after {content: "\\25bc";font-size: 0.7em;padding-left: 3px;line-height: 0.7em;}', 0);
+};
+
+// Run sortTable.init() when the page loads
+window.addEventListener
+ ? window.addEventListener('load', sortTable.init, false)
+ : window.attachEvent && window.attachEvent('onload', sortTable.init)
+ ;
+
+// Shim for IE11's lack of NodeList.prototype.forEach
+if (typeof NodeList.prototype.forEach !== "function") {
+ NodeList.prototype.forEach = Array.prototype.forEach;
+}
\ No newline at end of file